From 52ca346f88f78ccf58849a09b20645cc262359ca Mon Sep 17 00:00:00 2001 From: pingpingy1 Date: Wed, 8 Nov 2023 19:10:27 +0900 Subject: [PATCH 01/12] =?UTF-8?q?[Scrap]=20JS=20=EA=B8=B0=EB=B0=98=20?= =?UTF-8?q?=EC=8A=A4=ED=81=AC=EB=9E=A9=20=ED=95=A8=EC=88=98=20=EC=88=98?= =?UTF-8?q?=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scrap/group_head.py | 34 +++++++++++++++------------- scrap/local_councils/busan.py | 40 ++------------------------------- scrap/local_councils/incheon.py | 7 +++++- scrap/local_councils/seoul.py | 5 ++++- scrap/utils/requests.py | 40 ++++++++++++++++++++++++++++----- 5 files changed, 65 insertions(+), 61 deletions(-) diff --git a/scrap/group_head.py b/scrap/group_head.py index fc67015..40f44da 100644 --- a/scrap/group_head.py +++ b/scrap/group_head.py @@ -1,10 +1,10 @@ -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from selenium import webdriver -from selenium.webdriver.chrome.service import Service -from selenium.webdriver.common.by import By -from selenium.webdriver.chrome.options import Options +""" +광역단체장 및 기초단체장 정보를 스크랩합니다. +""" from time import sleep -import os + +from scrap.utils.types import Councilor +from scrap.utils.requests import get_selenium, By def scrap_group_heads( @@ -18,17 +18,19 @@ def scrap_group_heads( metro_heads: list[tuple[str, Councilor]] = [] local_heads: list[tuple[str, Councilor]] = [] - driver_loc = os.popen("which chromedriver").read().strip() - if len(driver_loc) == 0: - raise Exception("ChromeDriver를 다운로드한 후 다시 시도해주세요.") + browser = get_selenium(url) + + # driver_loc = os.popen("which chromedriver").read().strip() + # if len(driver_loc) == 0: + # raise Exception("ChromeDriver를 다운로드한 후 다시 시도해주세요.") - chrome_options = Options() - chrome_options.add_argument("--headless") - chrome_options.add_argument("--no-sandbox") + # chrome_options = Options() + # chrome_options.add_argument("--headless") + # chrome_options.add_argument("--no-sandbox") - webdriver_service = Service(driver_loc) - browser = webdriver.Chrome(service=webdriver_service, options=chrome_options) - browser.get(url) + # webdriver_service = Service(driver_loc) + # browser = webdriver.Chrome(service=webdriver_service, options=chrome_options) + # browser.get(url) areas = [ tag.text.strip() @@ -38,7 +40,7 @@ def scrap_group_heads( ] for area in areas: - # print(area) + print(area) browser.find_element( By.CSS_SELECTOR, f"li[data-areaname='{area}']" ).find_element(By.TAG_NAME, "a").click() diff --git a/scrap/local_councils/busan.py b/scrap/local_councils/busan.py index d4ae3a5..38d1553 100644 --- a/scrap/local_councils/busan.py +++ b/scrap/local_councils/busan.py @@ -1,21 +1,9 @@ from urllib.parse import urlparse import os -from selenium import webdriver -from selenium.webdriver.chrome.service import Service -from selenium.webdriver.common.by import By -from selenium.webdriver.chrome.options import Options -from time import sleep - -import os -from selenium import webdriver -from selenium.webdriver.chrome.service import Service -from selenium.webdriver.common.by import By -from selenium.webdriver.chrome.options import Options -from time import sleep from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup +from scrap.utils.requests import get_soup, get_selenium, By def scrap_26( @@ -443,31 +431,7 @@ def scrap_39( if len(driver_loc) == 0: raise Exception("ChromeDriver를 다운로드한 후 다시 시도해주세요.") - chrome_options = Options() - chrome_options.add_argument("--headless") - chrome_options.add_argument("--no-sandbox") - - webdriver_service = Service(driver_loc) - browser = webdriver.Chrome(service=webdriver_service, options=chrome_options) - browser.get(url) - - councilor_infos = browser.find_elements(By.CSS_SELECTOR, "dl[class='info']") - cur_win = browser.current_window_handle - - for info in councilor_infos: - name_tag = info.find_element(By.TAG_NAME, "span") - name = name_tag.text.strip() if name_tag else "이름 정보 없음" - driver_loc = os.popen("which chromedriver").read().strip() - if len(driver_loc) == 0: - raise Exception("ChromeDriver를 다운로드한 후 다시 시도해주세요.") - - chrome_options = Options() - chrome_options.add_argument("--headless") - chrome_options.add_argument("--no-sandbox") - - webdriver_service = Service(driver_loc) - browser = webdriver.Chrome(service=webdriver_service, options=chrome_options) - browser.get(url) + browser = get_selenium(url) councilor_infos = browser.find_elements(By.CSS_SELECTOR, "dl[class='info']") cur_win = browser.current_window_handle diff --git a/scrap/local_councils/incheon.py b/scrap/local_councils/incheon.py index 58384c3..cb9c985 100644 --- a/scrap/local_councils/incheon.py +++ b/scrap/local_councils/incheon.py @@ -2,7 +2,12 @@ """ from scrap.utils.types import CouncilType, Councilor, ScrapResult from scrap.utils.requests import get_soup -from scrap.local_councils.basic import * +from scrap.local_councils.basic import ( + get_profiles, + get_name, + find, + extract_party, +) def scrap_50(url="https://www.icjg.go.kr/council/cnmi0101c") -> ScrapResult: diff --git a/scrap/local_councils/seoul.py b/scrap/local_councils/seoul.py index ae2aead..e530aad 100644 --- a/scrap/local_councils/seoul.py +++ b/scrap/local_councils/seoul.py @@ -1,3 +1,6 @@ +""" +서울특별시 기초의회를 스크랩합니다. (1~25) +""" from urllib.parse import urlparse from scrap.utils.types import CouncilType, Councilor, ScrapResult @@ -183,7 +186,7 @@ def scrap_6(url="http://council.ddm.go.kr/citizen/menu1.asp") -> ScrapResult: profile_info = profile_soup.find("div", class_="profileTxt") if profile_info: - profile_string = profile_info.get_text().strip().split("\xa0") + profile_string = profile_info.get_text().strip().split(" ") idx = profile_string.index("소속정당") party = profile_string[idx + 2] diff --git a/scrap/utils/requests.py b/scrap/utils/requests.py index 16a2135..8123801 100644 --- a/scrap/utils/requests.py +++ b/scrap/utils/requests.py @@ -1,16 +1,24 @@ """ -크롤링 시 공통적으로 사용하는 requests 라이브러리와 bs4 라이브러리를 사용하기 쉽게 모듈화합니다. +크롤링 시 공통적으로 사용하는 requests, bs4, selenium 라이브러리를 사용하기 쉽게 모듈화합니다. """ +import os +from html import unescape +from unicodedata import normalize import requests from urllib3.exceptions import InsecureRequestWarning from bs4 import BeautifulSoup -from html import unescape -from unicodedata import normalize + +from selenium.webdriver.chrome.webdriver import WebDriver +from selenium import webdriver +from selenium.webdriver.chrome.service import Service +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By + # SSL 인증서 검증 경고 무시 requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning) # type: ignore # 충청북도 보은군, 강진시에서 타임아웃이 -timeout_time = 60 +TIMEOUT_TIME = 60 def get_soup( @@ -33,8 +41,30 @@ def get_soup( http_headers.update(additional_headers) response = requests.get( - url, verify=verify, headers=http_headers, timeout=timeout_time + url, verify=verify, headers=http_headers, timeout=TIMEOUT_TIME ) response.encoding = encoding sanitized_response = normalize("NFKC", unescape(response.text)) return BeautifulSoup(sanitized_response, "html.parser") + + +def get_selenium(url: str) -> WebDriver: + """ + url을 입력받아 WebDriver 객체를 반환합니다. + selenium 라이브러리를 사용해 JS 기반의 동적이 웹페이지 크롤링이 가능합니다. + WebDriver.click() 함수 호출 이후, time.sleep(1) 실행이 권장됩니다. + + :param url: 크롤링할 페이지의 url입니다.""" + driver_loc = os.popen("which chromedriver").read().strip() + if len(driver_loc) == 0: + raise Exception("ChromeDriver를 다운로드한 후 다시 시도해주세요.") + + chrome_options = Options() + chrome_options.add_argument("--headless") + chrome_options.add_argument("--no-sandbox") + + webdriver_service = Service(driver_loc) + browser = webdriver.Chrome(service=webdriver_service, options=chrome_options) + browser.get(url) + + return browser From 2bcca8528fb4f1e136b053181af15977aab951e2 Mon Sep 17 00:00:00 2001 From: pingpingy1 Date: Wed, 8 Nov 2023 19:28:30 +0900 Subject: [PATCH 02/12] =?UTF-8?q?[Scrap]=20=EA=B4=91=EC=A3=BC=EC=8B=9C?= =?UTF-8?q?=EC=9D=98=ED=9A=8C=20=EC=8A=A4=ED=81=AC=EB=9E=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scrap/metropolitan_council.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/scrap/metropolitan_council.py b/scrap/metropolitan_council.py index 1c31cdf..9c136c1 100644 --- a/scrap/metropolitan_council.py +++ b/scrap/metropolitan_council.py @@ -1,7 +1,7 @@ from urllib.parse import urlparse from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup +from scrap.utils.requests import get_soup, get_selenium, By def scrap_metro_1( @@ -139,10 +139,17 @@ def scrap_metro_5(url="https://council.gwangju.go.kr/index.do?PID=029") -> Scrap :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 """ - soup = get_soup(url, verify=False).find("table", class_="data").find("tbody") councilors: list[Councilor] = [] + browser = get_selenium(url) + + for profile in browser.find_elements(By.CSS_SELECTOR, "li[class='item_box']"): + name_tag = profile.find_element(By.CSS_SELECTOR, "li[class='name']") + name = name_tag.text if name_tag else "이름 정보 없음" + + party_tag = profile.find_element(By.CSS_SELECTOR, "li[class='item PA']") + party = party_tag.text if party_tag else "정당 정보 없음" - # TODO + councilors.append(Councilor(name, party)) return ScrapResult( council_id="gwangju", @@ -534,4 +541,4 @@ def scrap_metro_17( if __name__ == "__main__": - print(scrap_metro_17()) + print(scrap_metro_5()) From e7d90cbb8e6f700ca1e843a00c576b84132f5b9e Mon Sep 17 00:00:00 2001 From: pingpingy1 Date: Wed, 8 Nov 2023 19:52:38 +0900 Subject: [PATCH 03/12] =?UTF-8?q?[Scrap]=20=EC=9D=B8=EC=B2=9C=20=EB=AF=B8?= =?UTF-8?q?=EC=B6=94=ED=99=80=EA=B5=AC=20=EC=8A=A4=ED=81=AC=EB=9E=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scrap/local_councils/busan.py | 4 ---- scrap/local_councils/incheon.py | 26 +++++++++++++++++--------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/scrap/local_councils/busan.py b/scrap/local_councils/busan.py index 38d1553..05e1a08 100644 --- a/scrap/local_councils/busan.py +++ b/scrap/local_councils/busan.py @@ -427,10 +427,6 @@ def scrap_39( """ councilors: list[Councilor] = [] - driver_loc = os.popen("which chromedriver").read().strip() - if len(driver_loc) == 0: - raise Exception("ChromeDriver를 다운로드한 후 다시 시도해주세요.") - browser = get_selenium(url) councilor_infos = browser.find_elements(By.CSS_SELECTOR, "dl[class='info']") diff --git a/scrap/local_councils/incheon.py b/scrap/local_councils/incheon.py index cb9c985..5e015ae 100644 --- a/scrap/local_councils/incheon.py +++ b/scrap/local_councils/incheon.py @@ -1,7 +1,7 @@ """인천광역시를 스크랩. 50-57번째 의회까지 있음. """ from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup +from scrap.utils.requests import get_soup, get_selenium, By from scrap.local_councils.basic import ( get_profiles, get_name, @@ -79,16 +79,24 @@ def scrap_52( :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 """ - soup = get_soup(url, verify=False) + councilors: list[Councilor] = [] + browser = get_selenium(url) - script = ( - soup.find("div", class_="contents_header") - .find_next("script") - .get_text(strip=True) - ) + for profile in browser.find_elements(By.CSS_SELECTOR, "div[class='career_item']"): + name_tag = profile.find_element( + By.CSS_SELECTOR, "div[class='career_item_name']" + ) + name = name_tag.text.strip().split()[0].strip() if name_tag else "이름 정보 없음" + + party_tag = profile.find_element(By.TAG_NAME, "dl") + party = ( + party_tag.find_element(By.TAG_NAME, "dd").text.strip() + if party_tag + else "정당 정보 없음" + ) - # TODO + councilors.append(Councilor(name, party)) return ScrapResult( council_id="incheon-michuholgu", @@ -257,4 +265,4 @@ def scrap_57(url, args) -> ScrapResult: if __name__ == "__main__": - print(scrap_56()) + print(scrap_52()) From ab86f2a9831d0684fea9d1591b9e1a0067640187 Mon Sep 17 00:00:00 2001 From: Re-st Date: Tue, 7 Nov 2023 00:31:27 +0900 Subject: [PATCH 04/12] =?UTF-8?q?[Scrap]=2057-112=20=EC=A4=91=20=EC=97=90?= =?UTF-8?q?=EB=9F=AC:=20[76,=2078,=2097,=20101,=20106,=20111],=20=EC=B4=9D?= =?UTF-8?q?=206=ED=9A=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements.txt | 3 +- scrap/local_councils/basic.py | 8 +- scrap/local_councils/busan.py | 2 - scrap/local_councils/gangwon.py | 64 ++++++++++++- scrap/local_councils/gwangju.py | 155 +++++++++++++++++++++++++++++++- scrap/utils/spreadsheet.py | 42 ++++----- 6 files changed, 243 insertions(+), 31 deletions(-) diff --git a/requirements.txt b/requirements.txt index d7cc0d3..8437846 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,4 +11,5 @@ pandas==2.1.1 gspread==5.11.2 pymongo==4.5.0 python-dotenv==1.0.0 -openpyxl \ No newline at end of file +openpyxl +selenium \ No newline at end of file diff --git a/scrap/local_councils/basic.py b/scrap/local_councils/basic.py index ec74750..413f327 100644 --- a/scrap/local_councils/basic.py +++ b/scrap/local_councils/basic.py @@ -56,6 +56,8 @@ def get_name(profile, element, class_, wrapper_element, wrapper_class_): # span 태그 안의 것들을 다 지움 for span in name_tag.find_all("span"): span.decompose() + for a_tag in name_tag.find_all('a'): # 인천 서구 등. 안에 '개인홈페이지' 링크가 들음. + a_tag.extract() name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" # name은 길고 그 중 strong태그 안에 이름이 있는 경우. 은평구, 수원시 등. @@ -72,7 +74,11 @@ def get_name(profile, element, class_, wrapper_element, wrapper_class_): if keyword in name: # 인천 서구 등 name = name.replace(keyword, "").strip() break - name = name.split(" ")[0] # 이름 뒤에 직책이 따라오는 경우 + maybe_name = name.split()[0] # 이름 뒤에 직책이 따라오는 경우 + if len(maybe_name) == 1: # 외자 이름이 띄어쓰기 때문에 분리된 경우 + name = "".join(name.split()[0:2]) + else: + name = maybe_name return name diff --git a/scrap/local_councils/busan.py b/scrap/local_councils/busan.py index 05e1a08..9c2e755 100644 --- a/scrap/local_councils/busan.py +++ b/scrap/local_councils/busan.py @@ -1,5 +1,3 @@ -from urllib.parse import urlparse - import os from scrap.utils.types import CouncilType, Councilor, ScrapResult diff --git a/scrap/local_councils/gangwon.py b/scrap/local_councils/gangwon.py index 8cad825..40be1b6 100644 --- a/scrap/local_councils/gangwon.py +++ b/scrap/local_councils/gangwon.py @@ -1,11 +1,71 @@ -from urllib.parse import urlparse -import re +import os +from selenium import webdriver +from selenium.webdriver.chrome.service import Service +from selenium.webdriver.common.by import By +from selenium.webdriver.chrome.options import Options from scrap.utils.types import CouncilType, Councilor, ScrapResult, ScrapBasicArgument from scrap.utils.requests import get_soup from scrap.local_councils.basic import * +from scrap.utils.utils import getPartyList +party_keywords = getPartyList() +party_keywords.append("무소속") +def scrap_107( + url="https://council.wonju.go.kr/content/member/memberName.html", +) -> ScrapResult: + """강원도 원주시 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """ + councilors: list[Councilor] = [] + + driver_loc = os.popen("which chromedriver").read().strip() + if len(driver_loc) == 0: + raise Exception("ChromeDriver를 다운로드한 후 다시 시도해주세요.") + + chrome_options = Options() + chrome_options.add_argument("--headless") + chrome_options.add_argument("--no-sandbox") + + webdriver_service = Service(driver_loc) + browser = webdriver.Chrome(service=webdriver_service, options=chrome_options) + browser.get(url) + pfs_wrapper = browser.find_element(By.CSS_SELECTOR, "div[id='content']") + councilor_infos = pfs_wrapper.find_elements(By.CSS_SELECTOR, "dl") + for info in councilor_infos: + name_tag = info.find_element(By.CSS_SELECTOR, "dd[class='name']") + name = name_tag.text.split("(")[0].strip() if name_tag else "이름 정보 없음" + if len(name) > 3: + # 수식어가 이름 앞이나 뒤에 붙어있는 경우 + for keyword in ["부의장", "의원", "의장"]: # 119, 강서구 등 + if keyword in name: + name = name.replace(keyword, "").strip() + party_tag = info.find_elements(By.TAG_NAME, "dd") + for tag in party_tag: + party = tag.text.split(" ")[-1] + if party in party_keywords: + break + if party not in party_keywords: + party = "정당 정보 없음" + + councilors.append(Councilor(name, party)) + + return ScrapResult( + council_id="107", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors, + ) +# 107: ScrapBasicArgument( +# pf_memlistelt="div", +# pf_memlistcls="content", +# pf_elt="dl", +# name_elt="dd", +# name_cls="name", +# pty_elt="span", +# ), def scrap_113( url="https://sokchocl.go.kr/kr/member/active.do", args: ScrapBasicArgument = None ) -> ScrapResult: diff --git a/scrap/local_councils/gwangju.py b/scrap/local_councils/gwangju.py index a162b4a..28a8f48 100644 --- a/scrap/local_councils/gwangju.py +++ b/scrap/local_councils/gwangju.py @@ -1,5 +1,156 @@ """광주광역시를 스크랩. 60-64번째 의회까지 있음. """ +import os +from selenium import webdriver +from selenium.webdriver.chrome.service import Service +from selenium.webdriver.common.by import By +from selenium.webdriver.chrome.options import Options + from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -from scrap.local_councils.basic import * +from scrap.utils.utils import getPartyList +party_keywords = getPartyList() +party_keywords.append("무소속") + +def scrap_62( + url="http://www.gjnc.or.kr/main/contents/lawmakerDistrict", +) -> ScrapResult: + """광주시 서구 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """ + councilors: list[Councilor] = [] + + driver_loc = os.popen("which chromedriver").read().strip() + if len(driver_loc) == 0: + raise Exception("ChromeDriver를 다운로드한 후 다시 시도해주세요.") + + chrome_options = Options() + chrome_options.add_argument("--headless") + chrome_options.add_argument("--no-sandbox") + + webdriver_service = Service(driver_loc) + browser = webdriver.Chrome(service=webdriver_service, options=chrome_options) + browser.get(url) + + councilor_infos = browser.find_elements(By.CSS_SELECTOR, "div[class='con']") + cur_win = browser.current_window_handle + + for info in councilor_infos: + name_tag = info.find_element(By.TAG_NAME, "strong") + name = name_tag.text.strip() if name_tag else "이름 정보 없음" + homepage_link = info.find_element(By.TAG_NAME, "a") + homepage_link.click() + browser.switch_to.window( + [win for win in browser.window_handles if win != cur_win][0] + ) + + party_tag = browser.find_elements(By.TAG_NAME, "dd") + for tag in party_tag: + party = tag.text.strip() + if party in party_keywords: + break + if party not in party_keywords: + party = "정당 정보 없음" + + browser.close() + browser.switch_to.window(cur_win) + + councilors.append(Councilor(name, party)) + + return ScrapResult( + council_id="62", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors, + ) + +def scrap_63( + url="https://council.bukgu.gwangju.kr/index.do?PID=024", +) -> ScrapResult: + """광주시 북구 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """ + councilors: list[Councilor] = [] + + driver_loc = os.popen("which chromedriver").read().strip() + if len(driver_loc) == 0: + raise Exception("ChromeDriver를 다운로드한 후 다시 시도해주세요.") + + chrome_options = Options() + chrome_options.add_argument("--headless") + chrome_options.add_argument("--no-sandbox") + + webdriver_service = Service(driver_loc) + browser = webdriver.Chrome(service=webdriver_service, options=chrome_options) + browser.get(url) + + councilor_infos = browser.find_elements(By.CSS_SELECTOR, "ul[class='info']") + + for info in councilor_infos: + name_tag = info.find_element(By.CSS_SELECTOR, "li[class='name']").find_element(By.TAG_NAME, "h5") + name = name_tag.text.strip() if name_tag else "이름 정보 없음" + party_tag = info.find_elements(By.TAG_NAME, "dd") + for tag in party_tag: + party = tag.text.strip() + if party in party_keywords: + break + if party not in party_keywords: + party = "정당 정보 없음" + + councilors.append(Councilor(name, party)) + + return ScrapResult( + council_id="63", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors, + ) + +def scrap_64( + url="https://gjgc.or.kr/main/contents/lawmaker", +) -> ScrapResult: + """광주시 광산구 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """ + councilors: list[Councilor] = [] + + driver_loc = os.popen("which chromedriver").read().strip() + if len(driver_loc) == 0: + raise Exception("ChromeDriver를 다운로드한 후 다시 시도해주세요.") + + chrome_options = Options() + chrome_options.add_argument("--headless") + chrome_options.add_argument("--no-sandbox") + + webdriver_service = Service(driver_loc) + browser = webdriver.Chrome(service=webdriver_service, options=chrome_options) + browser.get(url) + + councilor_infos = browser.find_elements(By.CSS_SELECTOR, "div[class='con']") + + for info in councilor_infos: + name_tag = info.find_element(By.TAG_NAME, "strong") + name = name_tag.text.strip() if name_tag else "이름 정보 없음" + if len(name) > 3: + # 수식어가 이름 앞이나 뒤에 붙어있는 경우 + for keyword in ["부의장", "의원", "의장"]: # 119, 강서구 등 + if keyword in name: + name = name.replace(keyword, "").strip() + party_tag = info.find_elements(By.TAG_NAME, "dd") + for tag in party_tag: + party = tag.text.replace(" ", "") + if party in party_keywords: + break + if party not in party_keywords: + party = "정당 정보 없음" + + councilors.append(Councilor(name, party)) + + return ScrapResult( + council_id="64", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors, + ) \ No newline at end of file diff --git a/scrap/utils/spreadsheet.py b/scrap/utils/spreadsheet.py index 4498955..12ee93d 100644 --- a/scrap/utils/spreadsheet.py +++ b/scrap/utils/spreadsheet.py @@ -61,10 +61,12 @@ def main() -> None: euc_kr = [6, 13, 16, 31, 72, 88, 112, 134, 154, 157, 163, 165, 167, 181, 197, 202] special_functions = ( list(range(1, 57)) - + [57, 88, 103] + + [62, 63, 64, 88, 103, 107] + list(range(113, 127)) + [132, 134, 140, 142, 154, 155, 156, 157, 160, 161, 162, 163, 164, 165, 167] ) + no_information = [106, 111] + errors = [] args = { 2: ScrapBasicArgument( pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" @@ -75,11 +77,10 @@ def main() -> None: # 인천 57: ScrapBasicArgument( pf_elt="div", - pf_cls="box", + pf_cls="conbox", name_elt="p", - name_cls="mem_tit2", - pty_elt="p", - pty_cls="mem_tit2", + name_cls="name", + pty_elt="li", ), 58: ScrapBasicArgument( pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" @@ -98,9 +99,7 @@ def main() -> None: 61: ScrapBasicArgument( pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" ), - # 62 : TODO! /common/selectCouncilMemberProfile.json 을 어떻게 얻을지.. - # 63 : TODO! 홈페이지 터짐 - # 64 : TODO! /common/selectCouncilMemberProfile.json 을 어떻게 얻을지.. + # 62 - 64 : gwangju.py # 대전 65: ScrapBasicArgument( pf_elt="dl", @@ -372,16 +371,8 @@ def main() -> None: pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" ), # 강원 - # 106 : TODO! 정당정보 없음 - # TODO! 107이 get_soup에서 실패 중 - HTTPSConnectionPool(host='council.wonju.go.kr', port=443): Max retries exceeded with url: /content/member/memberName.html (Caused by SSLError(SSLError(1, '[SSL: DH_KEY_TOO_SMALL] dh key too small (_ssl.c:1007)'))) - 107: ScrapBasicArgument( - pf_memlistelt="div", - pf_memlistcls="content", - pf_elt="dl", - name_elt="dd", - name_cls="name", - pty_elt="span", - ), + # 106 : 정당정보 없음 + # 107 : scrap_gangwon.py 108: ScrapBasicArgument( pf_elt="dl", pf_cls="profile", name_elt="strong", pty_elt="li" ), @@ -625,11 +616,15 @@ def main() -> None: data: list[dict] = worksheet.get_all_records() result: str = "" - error_times = 0 parse_error_times = 0 timeouts = 0 N = 226 - for n in range(1, 227): + for n in range(57, 113): + if n in no_information: + print(f"| {n} | 오류: 지난번 확인 시, 정당 정보 등이 홈페이지에 없었습니다."\ + "다시 확인해보시겠어요? 링크 : ", data[n - 1]["URL"]) + errors.append(n) + continue encoding = "euc-kr" if n in euc_kr else "utf-8" result = None try: @@ -640,7 +635,7 @@ def main() -> None: function_name = f"scrap_{n}" if hasattr(sys.modules[__name__], function_name): function_to_call = getattr(sys.modules[__name__], function_name) - if n < 57: + if n < 57 or n in [62, 63, 64, 107]: result = str(function_to_call(council_url).councilors) else: result = str( @@ -653,16 +648,17 @@ def main() -> None: if "정보 없음" in result: print("정보 없음이 포함되어 있습니다.") parse_error_times += 1 + errors.append(n) print(f"| {n} | {result}") except Timeout: print(f"| {n} | 오류: Request to {council_url} timed out.") timeouts += 1 except Exception as e: print(f"| {n} | 오류: {e}") - error_times += 1 + errors.append(n) continue # 에러가 발생하면 다음 반복으로 넘어감 print( - f"| 총 실행 횟수: {N} | 에러 횟수: {error_times} | 정보 없음 횟수: {parse_error_times} | 타임아웃 횟수: {timeouts} |" + f"| 총 실행 횟수: {N} | 에러: {errors}, 총 {len(errors)}회 | 그 중 정보 없음 횟수: {parse_error_times} | 타임아웃 횟수: {timeouts} |" ) From 77f93167c1f520c344ac9be529603637d0d3fc81 Mon Sep 17 00:00:00 2001 From: Re-st Date: Mon, 6 Nov 2023 15:32:20 +0000 Subject: [PATCH 05/12] Formatted with black --- scrap/local_councils/basic.py | 4 ++-- scrap/local_councils/busan.py | 1 + scrap/local_councils/gangwon.py | 12 ++++++++---- scrap/local_councils/gwangju.py | 18 ++++++++++++------ scrap/utils/spreadsheet.py | 6 ++++-- 5 files changed, 27 insertions(+), 14 deletions(-) diff --git a/scrap/local_councils/basic.py b/scrap/local_councils/basic.py index 413f327..b71e162 100644 --- a/scrap/local_councils/basic.py +++ b/scrap/local_councils/basic.py @@ -56,7 +56,7 @@ def get_name(profile, element, class_, wrapper_element, wrapper_class_): # span 태그 안의 것들을 다 지움 for span in name_tag.find_all("span"): span.decompose() - for a_tag in name_tag.find_all('a'): # 인천 서구 등. 안에 '개인홈페이지' 링크가 들음. + for a_tag in name_tag.find_all("a"): # 인천 서구 등. 안에 '개인홈페이지' 링크가 들음. a_tag.extract() name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" @@ -75,7 +75,7 @@ def get_name(profile, element, class_, wrapper_element, wrapper_class_): name = name.replace(keyword, "").strip() break maybe_name = name.split()[0] # 이름 뒤에 직책이 따라오는 경우 - if len(maybe_name) == 1: # 외자 이름이 띄어쓰기 때문에 분리된 경우 + if len(maybe_name) == 1: # 외자 이름이 띄어쓰기 때문에 분리된 경우 name = "".join(name.split()[0:2]) else: name = maybe_name diff --git a/scrap/local_councils/busan.py b/scrap/local_councils/busan.py index 9c2e755..af8987a 100644 --- a/scrap/local_councils/busan.py +++ b/scrap/local_councils/busan.py @@ -4,6 +4,7 @@ from scrap.utils.requests import get_soup, get_selenium, By + def scrap_26( url="https://www.bsjunggu.go.kr/council/board/list.junggu?boardId=BBS_0000118&menuCd=DOM_000000503003000000&contentsSid=755&cpath=%2Fcouncil", ) -> ScrapResult: diff --git a/scrap/local_councils/gangwon.py b/scrap/local_councils/gangwon.py index 40be1b6..326920f 100644 --- a/scrap/local_councils/gangwon.py +++ b/scrap/local_councils/gangwon.py @@ -8,9 +8,11 @@ from scrap.utils.requests import get_soup from scrap.local_councils.basic import * from scrap.utils.utils import getPartyList + party_keywords = getPartyList() party_keywords.append("무소속") + def scrap_107( url="https://council.wonju.go.kr/content/member/memberName.html", ) -> ScrapResult: @@ -39,10 +41,10 @@ def scrap_107( name_tag = info.find_element(By.CSS_SELECTOR, "dd[class='name']") name = name_tag.text.split("(")[0].strip() if name_tag else "이름 정보 없음" if len(name) > 3: - # 수식어가 이름 앞이나 뒤에 붙어있는 경우 - for keyword in ["부의장", "의원", "의장"]: # 119, 강서구 등 - if keyword in name: - name = name.replace(keyword, "").strip() + # 수식어가 이름 앞이나 뒤에 붙어있는 경우 + for keyword in ["부의장", "의원", "의장"]: # 119, 강서구 등 + if keyword in name: + name = name.replace(keyword, "").strip() party_tag = info.find_elements(By.TAG_NAME, "dd") for tag in party_tag: party = tag.text.split(" ")[-1] @@ -58,6 +60,8 @@ def scrap_107( council_type=CouncilType.LOCAL_COUNCIL, councilors=councilors, ) + + # 107: ScrapBasicArgument( # pf_memlistelt="div", # pf_memlistcls="content", diff --git a/scrap/local_councils/gwangju.py b/scrap/local_councils/gwangju.py index 28a8f48..b0c635c 100644 --- a/scrap/local_councils/gwangju.py +++ b/scrap/local_councils/gwangju.py @@ -8,9 +8,11 @@ from scrap.utils.types import CouncilType, Councilor, ScrapResult from scrap.utils.utils import getPartyList + party_keywords = getPartyList() party_keywords.append("무소속") + def scrap_62( url="http://www.gjnc.or.kr/main/contents/lawmakerDistrict", ) -> ScrapResult: @@ -64,6 +66,7 @@ def scrap_62( councilors=councilors, ) + def scrap_63( url="https://council.bukgu.gwangju.kr/index.do?PID=024", ) -> ScrapResult: @@ -89,7 +92,9 @@ def scrap_63( councilor_infos = browser.find_elements(By.CSS_SELECTOR, "ul[class='info']") for info in councilor_infos: - name_tag = info.find_element(By.CSS_SELECTOR, "li[class='name']").find_element(By.TAG_NAME, "h5") + name_tag = info.find_element(By.CSS_SELECTOR, "li[class='name']").find_element( + By.TAG_NAME, "h5" + ) name = name_tag.text.strip() if name_tag else "이름 정보 없음" party_tag = info.find_elements(By.TAG_NAME, "dd") for tag in party_tag: @@ -107,6 +112,7 @@ def scrap_63( councilors=councilors, ) + def scrap_64( url="https://gjgc.or.kr/main/contents/lawmaker", ) -> ScrapResult: @@ -135,10 +141,10 @@ def scrap_64( name_tag = info.find_element(By.TAG_NAME, "strong") name = name_tag.text.strip() if name_tag else "이름 정보 없음" if len(name) > 3: - # 수식어가 이름 앞이나 뒤에 붙어있는 경우 - for keyword in ["부의장", "의원", "의장"]: # 119, 강서구 등 - if keyword in name: - name = name.replace(keyword, "").strip() + # 수식어가 이름 앞이나 뒤에 붙어있는 경우 + for keyword in ["부의장", "의원", "의장"]: # 119, 강서구 등 + if keyword in name: + name = name.replace(keyword, "").strip() party_tag = info.find_elements(By.TAG_NAME, "dd") for tag in party_tag: party = tag.text.replace(" ", "") @@ -153,4 +159,4 @@ def scrap_64( council_id="64", council_type=CouncilType.LOCAL_COUNCIL, councilors=councilors, - ) \ No newline at end of file + ) diff --git a/scrap/utils/spreadsheet.py b/scrap/utils/spreadsheet.py index 12ee93d..602cb13 100644 --- a/scrap/utils/spreadsheet.py +++ b/scrap/utils/spreadsheet.py @@ -621,8 +621,10 @@ def main() -> None: N = 226 for n in range(57, 113): if n in no_information: - print(f"| {n} | 오류: 지난번 확인 시, 정당 정보 등이 홈페이지에 없었습니다."\ - "다시 확인해보시겠어요? 링크 : ", data[n - 1]["URL"]) + print( + f"| {n} | 오류: 지난번 확인 시, 정당 정보 등이 홈페이지에 없었습니다." "다시 확인해보시겠어요? 링크 : ", + data[n - 1]["URL"], + ) errors.append(n) continue encoding = "euc-kr" if n in euc_kr else "utf-8" From 26d84131e2d9bacdcaa5d5d1093ce945279ba384 Mon Sep 17 00:00:00 2001 From: Re-st Date: Tue, 7 Nov 2023 02:16:37 +0900 Subject: [PATCH 06/12] =?UTF-8?q?[analysis]=20v1.0=20=EC=99=84=EC=84=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- analysis/age/__init__.py | 3 + analysis/age/draw.py | 68 +++++++++++++ analysis/age/hist_groups.py | 137 ++++++++++++++++++++++++++ analysis/age/main.py | 37 +++++++ analysis/age/most_common_age_group.py | 23 +++++ analysis/age_group.py | 118 ---------------------- 6 files changed, 268 insertions(+), 118 deletions(-) create mode 100644 analysis/age/__init__.py create mode 100644 analysis/age/draw.py create mode 100644 analysis/age/hist_groups.py create mode 100644 analysis/age/main.py create mode 100644 analysis/age/most_common_age_group.py delete mode 100644 analysis/age_group.py diff --git a/analysis/age/__init__.py b/analysis/age/__init__.py new file mode 100644 index 0000000..ccad035 --- /dev/null +++ b/analysis/age/__init__.py @@ -0,0 +1,3 @@ +""" +공공데이터포털 API로 수집한 데이터를 분석하기 위한 패키지입니다. +""" diff --git a/analysis/age/draw.py b/analysis/age/draw.py new file mode 100644 index 0000000..a304ca1 --- /dev/null +++ b/analysis/age/draw.py @@ -0,0 +1,68 @@ +import os +import seaborn as sns +import numpy as np +import matplotlib.pyplot as plt +from matplotlib import pyplot as plt + +def make_scatterplot(package): + (outdir, total_population_count, year, df_age, n_clst, method, cluster_by, folder_name, colors, font_name) = package + # 산점도로 클러스터링 결과를 시각화합니다. + sns.set(style="whitegrid") # Seaborn 스타일 설정 (선택적) + plt.figure(figsize=(10, 6)) # 그림 크기 설정 (선택적) + print(df_age) + sns.scatterplot( + data=df_age, x="age", y="area", palette="viridis" + ) + # 클러스터 중심 나이를 플롯에 추가합니다. + for _, row in df_age.iterrows(): + area = row['area'] + age = row['age'] + print(age) + plt.text(age, area, "{:.2f}".format(float(age)), fontsize=12, ha="right", fontname = font_name) + plt.xlabel("나이", fontname = font_name) + plt.ylabel("지역", fontname = font_name) + plt.yticks(fontname=font_name) + plt.title(f"{folder_name}자 나이 분포 ({year}) <총 {total_population_count}명>", fontname = font_name) + # 그래프를 이미지 파일로 저장합니다. + plt.savefig(os.path.join(outdir, method, f"clustering_result ({year}).png"), dpi=300) # 파일 이름 및 해상도 설정 (선택적) + plt.close() + +def plot_eachgroup(df, n_clst, colors): + minage = min(df["age"].min(), 20) + maxage = max(df["age"].max(), 80) + for i in range(n_clst): + clst_data = df[df["cluster_label"] == i] + sns.histplot(data=clst_data, + x="age", + kde=False, + label=f"Cluster {i}", + color=colors[i], + element="step", + bins=range(minage, maxage, 1)) + # 몇 명인지 프린트하기 + print(f"Cluster {i}: {clst_data.shape[0]} people") + # 그룹마다 몇 살인지 프린트하기 + print(f"Cluster {i}: {clst_data['age']}") + +def make_hist(package): + (outdir, df, year, area, n_clst, method, cluster_by, folder_name, colors, font_name) = package + plt.figure(figsize=(10, 6)) + # 시각화 + # plot_young_and_old(yb_clst, ob_clst) + plot_eachgroup(df, n_clst, colors) + total_population_count = df[df[cluster_by] == area].shape[0] + if cluster_by == "sdName": + plt.title(f"{area} {folder_name}자 나이 분포 ({year}) <총 {total_population_count}명>", fontname = font_name) + elif cluster_by == "wiwName": + sdName = df[df["wiwName"] == area]["sdName"].iloc[0] + plt.title(f"{sdName} {area} {folder_name}자 나이 분포 ({year}) <총 {total_population_count}명>", fontname = font_name) + else: + print("cluster_by를 sdName 또는 wiwName으로 설정해주세요.") + return + plt.xlabel("나이", fontname = font_name) + plt.ylabel("인원 수", fontname = font_name) + max_ppl_in_age = df["age"].value_counts().max() + plt.yticks(np.arange(0, max(10, max_ppl_in_age), step=5), fontsize=12) + plt.savefig(os.path.join(outdir, method, f"{year}-{area}.png")) + plt.close() + print(f"Saved ", os.path.join(outdir, method, f"{year}-{area}.png")) \ No newline at end of file diff --git a/analysis/age/hist_groups.py b/analysis/age/hist_groups.py new file mode 100644 index 0000000..6f8679c --- /dev/null +++ b/analysis/age/hist_groups.py @@ -0,0 +1,137 @@ +# coding=utf-8 +import os +import seaborn as sns +import numpy as np +import pandas as pd +from sklearn.cluster import KMeans +from matplotlib import cm +from analysis.age.draw import make_scatterplot, make_hist + +def plot_young_and_old(youngest_cluster, oldest_cluster): + try: + sns.histplot(data=youngest_cluster, + x="age", + kde=True, + label="Youngest Cluster", + color="blue", + element="step", + bins=range(youngest_cluster['age'].min(), + youngest_cluster['age'].max() + 1, 1)) + except: + pass + try: + sns.histplot(data=oldest_cluster, + x="age", + kde=True, + label="Oldest Cluster", + color="red", + element="step", + bins=range(oldest_cluster['age'].min(), + oldest_cluster['age'].max() + 1, 1)) + except: + pass + +def cluster_data(method, n_clst, df): + if method == "kmeans": + ages_data = df[["age"]] + # K-means 모델을 초기화하고 학습합니다. + kmeans = KMeans(n_clusters= min(n_clst, len(ages_data)), random_state=0) + kmeans.fit(ages_data) + + # 각 데이터 포인트가 속한 클러스터를 나타내는 레이블을 가져옵니다. + clst_labels = kmeans.labels_ + elif method == "equal": + clst_labels = [] + clst_labels = np.repeat(np.arange(n_clst), len(df) // n_clst) + clst_labels = np.append(clst_labels, np.arange(len(df) % n_clst)) + clst_labels.sort() + clst_labels = np.array(clst_labels) + df["cluster_label"] = clst_labels + # 같은 나이는 같은 클러스터에 속하도록 합니다. + # 0번 클러스터는 생기도록 합니다. + for i in [0]: + max_age = df[df["cluster_label"] == i]["age"].max() + # when "age" == max_age, change "cluster_label" to be i + df.loc[df["age"] == max_age, "cluster_label"] = i + for i in range(2, n_clst): + min_age = df[df["cluster_label"] == i]["age"].min() + # when "age" == min_age, change "cluster_label" to be i + df.loc[df["age"] == min_age, "cluster_label"] = i + return df + +def cluster(df, year, n_clst, method, cluster_by, outdir, font_name, folder_name): + """구역별 그룹을 만듭니다. + df: 데이터프레임 + year: 선거 연도 + n_clusters: 그룹 수 + """ + os.makedirs(os.path.join(outdir, method), exist_ok=True) + youngest_age = ('', 100) + oldest_age = ('', 0) + print(f"({year}), {n_clst} clusters") + print(f"{'-' * 20}") + # Get a colormap for generating unique colors for clusters + colors = cm.rainbow(np.linspace(0, 1, n_clst)) + + # 데이터프레임에서 시도별로 묶은 후 나이 열만 가져옵니다. + df_age = pd.DataFrame(columns=['area', 'age']) + for area, df_clst in df.groupby(cluster_by): + df_clst = cluster_data(method, n_clst, df_clst) + # 클러스터 중심 나이를 계산합니다. + clst_age_mean = [] + for i in range(n_clst): + clst_data = df_clst[df_clst["cluster_label"] == i] + cluster_center_age = round( + clst_data["age"].mean(), 2 + ) # 나이를 소수점 2자리까지 반올림 + clst_age_mean.append(cluster_center_age) + + clst_of_young = clst_age_mean.index(min(clst_age_mean)) + clst_of_old = clst_age_mean.index(max(clst_age_mean)) + clst_age_mean.sort() + new_data = pd.DataFrame({'area': area, 'age': clst_age_mean + }) + df_age = pd.concat([df_age, new_data], ignore_index=True) + print(clst_age_mean) + + yb_clst = df_clst[df_clst["cluster_label"] == clst_of_young] + ob_clst = df_clst[df_clst["cluster_label"] == clst_of_old] + print( + f"Youngest in {area}: {yb_clst['age'].min()} - {yb_clst['age'].max()}" + ) + print( + f"Oldest in {area}: {ob_clst['age'].min()} - {ob_clst['age'].max()}" + ) + if clst_age_mean[0] < youngest_age[1]: + youngest_age = (area, clst_age_mean[0]) + if clst_age_mean[-1] > oldest_age[1]: + oldest_age = (area, clst_age_mean[-1]) + + # 그룹의 성비를 계산합니다. + young_group_sexratio = ( + yb_clst[yb_clst["gender"] == "여"].shape[0] + / yb_clst.shape[0] + ) + old_group_sexratio = ( + ob_clst[ob_clst["gender"] == "여"].shape[0] + / ob_clst.shape[0] + ) + print(f"젊은 층의 성비는 여자가 {young_group_sexratio}, 노인층의 성비는 여자가 {old_group_sexratio}") + + # 그리기 + package = (outdir, df_clst, year, area, n_clst, method, cluster_by, folder_name, colors, font_name) + make_hist(package) + + print(f"Number of data points per cluster for {area}") + for cluster_label in range(n_clst): + closest_data_count = sum(df_clst["cluster_label"] == cluster_label) + print( + f"Cluster {cluster_label}: Age {clst_age_mean[cluster_label]}, {closest_data_count} closest data points" + ) + print(f"Youngest in {youngest_age[0]}: {youngest_age[1]}") + print(f"Oldest in {oldest_age[0]}: {oldest_age[1]}") + + # 그리기 + package = (outdir, df.shape[0], year, df_age, n_clst, method, cluster_by, folder_name, colors, font_name) + make_scatterplot(package) + \ No newline at end of file diff --git a/analysis/age/main.py b/analysis/age/main.py new file mode 100644 index 0000000..205d276 --- /dev/null +++ b/analysis/age/main.py @@ -0,0 +1,37 @@ +# coding=utf-8 +import pandas as pd +import os +import warnings +from matplotlib import font_manager +from analysis.age.most_common_age_group import most_common_age_group +from analysis.age.hist_groups import cluster +# 경고 무시 +warnings.filterwarnings("ignore", category=FutureWarning) + +BASE_DIR = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir) +# matplotlib 한국어 폰트 설정 +font_name = font_manager.FontProperties(fname=os.path.join(BASE_DIR, "_data", "NanumSquareL.ttf")).get_name() + +def main(): + for folder_name in ["지선-당선", "지선-후보"]: + for cluster_by in ["sdName", "wiwName"]: + # folder_name = input("_data 내의 폴더 이름은 무엇인가요?") + # cluster_by = input("구역을 나눌 기준을 입력해주세요 (sdName 즉 시/도 또는 wiwName 즉 기초단체단위): ") + datadir = os.path.join(BASE_DIR, "_data", folder_name) + outdir = os.path.join(BASE_DIR, "output", f"age_all_{cluster_by}", folder_name) + + for d in os.listdir(datadir): + # xlsx 파일을 읽어옵니다. + if not d.endswith(".xlsx"): + continue + df = pd.read_excel(os.path.join(datadir, d)) + + # 필요한 열만 추출합니다. + df = df[["sdName", "wiwName", "name", "age", "gender"]] + df = df.sort_values(by="age") + year = d[7:11] + # most_common_age_group(df, year) + cluster(df, year, 7, 'kmeans', cluster_by, outdir, font_name, folder_name) + cluster(df, year, 7, 'equal', cluster_by, outdir, font_name, folder_name) + +main() diff --git a/analysis/age/most_common_age_group.py b/analysis/age/most_common_age_group.py new file mode 100644 index 0000000..bafead3 --- /dev/null +++ b/analysis/age/most_common_age_group.py @@ -0,0 +1,23 @@ +# coding=utf-8 +import pandas as pd + +def most_common_age_group(df, d): + """10년단위로 무리짓고 가장 사람 많은 무리 출력. + df: 데이터프레임 + d: 파일 이름""" + age_groups = pd.cut( + df["age"], + [0, 30, 40, 50, 60, 70, 80, 90, 100], + labels=["0-30", "31-40", "41-50", "51-60", "61-70", "71-80", "81-90", "91-100"], + ) + + # 나이 그룹을 데이터프레임에 추가합니다. + df["age_group"] = age_groups + + # 각 구역에서 가장 많은 나이 그룹을 찾습니다. + most_common_age_group_by_region = df.groupby("sdName")["age_group"].agg( + lambda x: x.mode().iloc[0] + ) + + # 결과를 출력합니다. + print(d, most_common_age_group_by_region) diff --git a/analysis/age_group.py b/analysis/age_group.py deleted file mode 100644 index 4e009c9..0000000 --- a/analysis/age_group.py +++ /dev/null @@ -1,118 +0,0 @@ -# coding=utf-8 -import pandas as pd -import os -import warnings -import seaborn as sns -import matplotlib.pyplot as plt - -from sklearn.cluster import KMeans -from sklearn.metrics import pairwise_distances -from matplotlib import font_manager, rc - -# 경고 무시 -warnings.filterwarnings("ignore", category=FutureWarning) - -BASE_DIR = os.path.join(os.path.dirname(__file__), os.pardir) -datadir = os.path.join(BASE_DIR, "_data") -outdir = os.path.join(BASE_DIR, "output") - -# 폰트 경로를 rcParams에 설정합니다. -plt.rcParams["font.family"] = "NanumGothic" -plt.rcParams["axes.unicode_minus"] = False # 마이너스 기호 표시 설정 - -for d in os.listdir(datadir): - # xlsx 파일을 읽어옵니다. - if not d.endswith(".xlsx"): - continue - df = pd.read_excel(os.path.join(datadir, d)) - - # 필요한 열만 추출합니다. - df = df[["sdName", "name", "age"]] - - # 나이를 기반으로 그룹을 만듭니다. - age_groups = pd.cut( - df["age"], - [0, 30, 40, 50, 60, 70, 80, 90, 100], - labels=["0-30", "31-40", "41-50", "51-60", "61-70", "71-80", "81-90", "91-100"], - ) - - # 나이 그룹을 데이터프레임에 추가합니다. - df["age_group"] = age_groups - - # 각 구역에서 가장 많은 나이 그룹을 찾습니다. - most_common_age_group_by_region = df.groupby("sdName")["age_group"].agg( - lambda x: x.mode().iloc[0] - ) - - # 결과를 출력합니다. - print(d, most_common_age_group_by_region) - - # 각 구역의 나이 평균을 계산합니다. - average_age_by_region = df.groupby("sdName")["age"].mean() - - # 결과를 출력합니다. - print(average_age_by_region) - - # K-means 클러스터링을 위한 데이터를 준비합니다. - data_for_clustering = df[["age"]] - - # 클러스터의 개수 설정 - n_clusters = 5 # 원하는 클러스터 개수를 지정합니다. - - # K-means 모델을 초기화하고 학습합니다. - kmeans = KMeans(n_clusters=n_clusters, random_state=0) - kmeans.fit(data_for_clustering) - - # 각 데이터 포인트가 속한 클러스터를 나타내는 레이블을 가져옵니다. - cluster_labels = kmeans.labels_ - - # 클러스터 중심 나이를 계산합니다. - cluster_centers_age = [] - for i in range(n_clusters): - cluster_data = df[cluster_labels == i] - cluster_center_age = round(cluster_data["age"].mean(), 2) # 나이를 소수점 2자리까지 반올림 - cluster_centers_age.append(cluster_center_age) - - # 결과를 출력합니다. - print(cluster_centers_age) - - # 클러스터링 결과로 얻은 레이블을 데이터프레임에 추가합니다. - df["cluster_label"] = cluster_labels - - # 클러스터 중심 위치를 가져옵니다. - cluster_centers = kmeans.cluster_centers_ - - # 클러스터링 결과로부터 각 데이터 포인트와 클러스터 중심 간의 거리를 계산합니다. - distances = pairwise_distances(data_for_clustering, cluster_centers) - - # 각 클러스터에서 가장 가까운 데이터의 인덱스를 찾습니다. - closest_data_indices = distances.argmin(axis=1) - - # 각 클러스터에서 가장 가까운 데이터의 수를 세어 출력합니다. - for cluster_label in range(n_clusters): - closest_data_count = sum(df["cluster_label"] == cluster_label) - print( - f"Cluster {cluster_label}: Age {cluster_centers_age[cluster_label]}, {closest_data_count} closest data points" - ) - - # 산점도로 클러스터링 결과를 시각화합니다. - sns.set(style="whitegrid") # Seaborn 스타일 설정 (선택적) - plt.figure(figsize=(10, 6)) # 그림 크기 설정 (선택적) - - sns.scatterplot( - data=df, x="age", y="sdName", hue="cluster_label", palette="viridis" - ) - - # 클러스터 중심 나이를 플롯에 추가합니다. - for i, age in enumerate(cluster_centers_age): - plt.text(age, i, f"Cluster {i}: {age:.2f}", fontsize=12, ha="right") - - plt.xlabel("나이") - plt.ylabel("지역") - plt.title("K-means 클러스터링 결과") - plt.legend(title="클러스터") - # 그래프를 이미지 파일로 저장합니다. - pngpath = os.path.join(outdir, "clustering_result.png") - plt.savefig(pngpath, dpi=300) # 파일 이름 및 해상도 설정 (선택적) - - break From d1882067f22ed5266bc0fc1e3b4ced36680bf83c Mon Sep 17 00:00:00 2001 From: Re-st Date: Mon, 6 Nov 2023 17:17:14 +0000 Subject: [PATCH 07/12] Formatted with black --- analysis/age/draw.py | 93 +++++++++++++++++------- analysis/age/hist_groups.py | 101 ++++++++++++++++---------- analysis/age/main.py | 19 ++++- analysis/age/most_common_age_group.py | 1 + 4 files changed, 145 insertions(+), 69 deletions(-) diff --git a/analysis/age/draw.py b/analysis/age/draw.py index a304ca1..6c00aeb 100644 --- a/analysis/age/draw.py +++ b/analysis/age/draw.py @@ -4,65 +4,108 @@ import matplotlib.pyplot as plt from matplotlib import pyplot as plt + def make_scatterplot(package): - (outdir, total_population_count, year, df_age, n_clst, method, cluster_by, folder_name, colors, font_name) = package - # 산점도로 클러스터링 결과를 시각화합니다. + ( + outdir, + total_population_count, + year, + df_age, + n_clst, + method, + cluster_by, + folder_name, + colors, + font_name, + ) = package + # 산점도로 클러스터링 결과를 시각화합니다. sns.set(style="whitegrid") # Seaborn 스타일 설정 (선택적) plt.figure(figsize=(10, 6)) # 그림 크기 설정 (선택적) print(df_age) - sns.scatterplot( - data=df_age, x="age", y="area", palette="viridis" - ) + sns.scatterplot(data=df_age, x="age", y="area", palette="viridis") # 클러스터 중심 나이를 플롯에 추가합니다. for _, row in df_age.iterrows(): - area = row['area'] - age = row['age'] + area = row["area"] + age = row["age"] print(age) - plt.text(age, area, "{:.2f}".format(float(age)), fontsize=12, ha="right", fontname = font_name) - plt.xlabel("나이", fontname = font_name) - plt.ylabel("지역", fontname = font_name) + plt.text( + age, + area, + "{:.2f}".format(float(age)), + fontsize=12, + ha="right", + fontname=font_name, + ) + plt.xlabel("나이", fontname=font_name) + plt.ylabel("지역", fontname=font_name) plt.yticks(fontname=font_name) - plt.title(f"{folder_name}자 나이 분포 ({year}) <총 {total_population_count}명>", fontname = font_name) + plt.title( + f"{folder_name}자 나이 분포 ({year}) <총 {total_population_count}명>", + fontname=font_name, + ) # 그래프를 이미지 파일로 저장합니다. - plt.savefig(os.path.join(outdir, method, f"clustering_result ({year}).png"), dpi=300) # 파일 이름 및 해상도 설정 (선택적) + plt.savefig( + os.path.join(outdir, method, f"clustering_result ({year}).png"), dpi=300 + ) # 파일 이름 및 해상도 설정 (선택적) plt.close() + def plot_eachgroup(df, n_clst, colors): minage = min(df["age"].min(), 20) maxage = max(df["age"].max(), 80) for i in range(n_clst): clst_data = df[df["cluster_label"] == i] - sns.histplot(data=clst_data, - x="age", - kde=False, - label=f"Cluster {i}", - color=colors[i], - element="step", - bins=range(minage, maxage, 1)) + sns.histplot( + data=clst_data, + x="age", + kde=False, + label=f"Cluster {i}", + color=colors[i], + element="step", + bins=range(minage, maxage, 1), + ) # 몇 명인지 프린트하기 print(f"Cluster {i}: {clst_data.shape[0]} people") # 그룹마다 몇 살인지 프린트하기 print(f"Cluster {i}: {clst_data['age']}") + def make_hist(package): - (outdir, df, year, area, n_clst, method, cluster_by, folder_name, colors, font_name) = package + ( + outdir, + df, + year, + area, + n_clst, + method, + cluster_by, + folder_name, + colors, + font_name, + ) = package plt.figure(figsize=(10, 6)) # 시각화 # plot_young_and_old(yb_clst, ob_clst) plot_eachgroup(df, n_clst, colors) total_population_count = df[df[cluster_by] == area].shape[0] if cluster_by == "sdName": - plt.title(f"{area} {folder_name}자 나이 분포 ({year}) <총 {total_population_count}명>", fontname = font_name) + plt.title( + f"{area} {folder_name}자 나이 분포 ({year}) <총 {total_population_count}명>", + fontname=font_name, + ) elif cluster_by == "wiwName": sdName = df[df["wiwName"] == area]["sdName"].iloc[0] - plt.title(f"{sdName} {area} {folder_name}자 나이 분포 ({year}) <총 {total_population_count}명>", fontname = font_name) + plt.title( + f"{sdName} {area} {folder_name}자 나이 분포 ({year}) <총 {total_population_count}명>", + fontname=font_name, + ) else: print("cluster_by를 sdName 또는 wiwName으로 설정해주세요.") return - plt.xlabel("나이", fontname = font_name) - plt.ylabel("인원 수", fontname = font_name) + plt.xlabel("나이", fontname=font_name) + plt.ylabel("인원 수", fontname=font_name) max_ppl_in_age = df["age"].value_counts().max() plt.yticks(np.arange(0, max(10, max_ppl_in_age), step=5), fontsize=12) plt.savefig(os.path.join(outdir, method, f"{year}-{area}.png")) plt.close() - print(f"Saved ", os.path.join(outdir, method, f"{year}-{area}.png")) \ No newline at end of file + print(f"Saved ", os.path.join(outdir, method, f"{year}-{area}.png")) diff --git a/analysis/age/hist_groups.py b/analysis/age/hist_groups.py index 6f8679c..261339e 100644 --- a/analysis/age/hist_groups.py +++ b/analysis/age/hist_groups.py @@ -7,35 +7,41 @@ from matplotlib import cm from analysis.age.draw import make_scatterplot, make_hist + def plot_young_and_old(youngest_cluster, oldest_cluster): try: - sns.histplot(data=youngest_cluster, - x="age", - kde=True, - label="Youngest Cluster", - color="blue", - element="step", - bins=range(youngest_cluster['age'].min(), - youngest_cluster['age'].max() + 1, 1)) + sns.histplot( + data=youngest_cluster, + x="age", + kde=True, + label="Youngest Cluster", + color="blue", + element="step", + bins=range( + youngest_cluster["age"].min(), youngest_cluster["age"].max() + 1, 1 + ), + ) except: pass try: - sns.histplot(data=oldest_cluster, - x="age", - kde=True, - label="Oldest Cluster", - color="red", - element="step", - bins=range(oldest_cluster['age'].min(), - oldest_cluster['age'].max() + 1, 1)) + sns.histplot( + data=oldest_cluster, + x="age", + kde=True, + label="Oldest Cluster", + color="red", + element="step", + bins=range(oldest_cluster["age"].min(), oldest_cluster["age"].max() + 1, 1), + ) except: pass + def cluster_data(method, n_clst, df): if method == "kmeans": ages_data = df[["age"]] # K-means 모델을 초기화하고 학습합니다. - kmeans = KMeans(n_clusters= min(n_clst, len(ages_data)), random_state=0) + kmeans = KMeans(n_clusters=min(n_clst, len(ages_data)), random_state=0) kmeans.fit(ages_data) # 각 데이터 포인트가 속한 클러스터를 나타내는 레이블을 가져옵니다. @@ -59,6 +65,7 @@ def cluster_data(method, n_clst, df): df.loc[df["age"] == min_age, "cluster_label"] = i return df + def cluster(df, year, n_clst, method, cluster_by, outdir, font_name, folder_name): """구역별 그룹을 만듭니다. df: 데이터프레임 @@ -66,42 +73,35 @@ def cluster(df, year, n_clst, method, cluster_by, outdir, font_name, folder_name n_clusters: 그룹 수 """ os.makedirs(os.path.join(outdir, method), exist_ok=True) - youngest_age = ('', 100) - oldest_age = ('', 0) + youngest_age = ("", 100) + oldest_age = ("", 0) print(f"({year}), {n_clst} clusters") print(f"{'-' * 20}") # Get a colormap for generating unique colors for clusters colors = cm.rainbow(np.linspace(0, 1, n_clst)) # 데이터프레임에서 시도별로 묶은 후 나이 열만 가져옵니다. - df_age = pd.DataFrame(columns=['area', 'age']) + df_age = pd.DataFrame(columns=["area", "age"]) for area, df_clst in df.groupby(cluster_by): df_clst = cluster_data(method, n_clst, df_clst) # 클러스터 중심 나이를 계산합니다. clst_age_mean = [] for i in range(n_clst): clst_data = df_clst[df_clst["cluster_label"] == i] - cluster_center_age = round( - clst_data["age"].mean(), 2 - ) # 나이를 소수점 2자리까지 반올림 + cluster_center_age = round(clst_data["age"].mean(), 2) # 나이를 소수점 2자리까지 반올림 clst_age_mean.append(cluster_center_age) clst_of_young = clst_age_mean.index(min(clst_age_mean)) clst_of_old = clst_age_mean.index(max(clst_age_mean)) clst_age_mean.sort() - new_data = pd.DataFrame({'area': area, 'age': clst_age_mean - }) + new_data = pd.DataFrame({"area": area, "age": clst_age_mean}) df_age = pd.concat([df_age, new_data], ignore_index=True) print(clst_age_mean) yb_clst = df_clst[df_clst["cluster_label"] == clst_of_young] ob_clst = df_clst[df_clst["cluster_label"] == clst_of_old] - print( - f"Youngest in {area}: {yb_clst['age'].min()} - {yb_clst['age'].max()}" - ) - print( - f"Oldest in {area}: {ob_clst['age'].min()} - {ob_clst['age'].max()}" - ) + print(f"Youngest in {area}: {yb_clst['age'].min()} - {yb_clst['age'].max()}") + print(f"Oldest in {area}: {ob_clst['age'].min()} - {ob_clst['age'].max()}") if clst_age_mean[0] < youngest_age[1]: youngest_age = (area, clst_age_mean[0]) if clst_age_mean[-1] > oldest_age[1]: @@ -109,17 +109,28 @@ def cluster(df, year, n_clst, method, cluster_by, outdir, font_name, folder_name # 그룹의 성비를 계산합니다. young_group_sexratio = ( - yb_clst[yb_clst["gender"] == "여"].shape[0] - / yb_clst.shape[0] + yb_clst[yb_clst["gender"] == "여"].shape[0] / yb_clst.shape[0] ) old_group_sexratio = ( - ob_clst[ob_clst["gender"] == "여"].shape[0] - / ob_clst.shape[0] + ob_clst[ob_clst["gender"] == "여"].shape[0] / ob_clst.shape[0] + ) + print( + f"젊은 층의 성비는 여자가 {young_group_sexratio}, 노인층의 성비는 여자가 {old_group_sexratio}" ) - print(f"젊은 층의 성비는 여자가 {young_group_sexratio}, 노인층의 성비는 여자가 {old_group_sexratio}") - + # 그리기 - package = (outdir, df_clst, year, area, n_clst, method, cluster_by, folder_name, colors, font_name) + package = ( + outdir, + df_clst, + year, + area, + n_clst, + method, + cluster_by, + folder_name, + colors, + font_name, + ) make_hist(package) print(f"Number of data points per cluster for {area}") @@ -132,6 +143,16 @@ def cluster(df, year, n_clst, method, cluster_by, outdir, font_name, folder_name print(f"Oldest in {oldest_age[0]}: {oldest_age[1]}") # 그리기 - package = (outdir, df.shape[0], year, df_age, n_clst, method, cluster_by, folder_name, colors, font_name) + package = ( + outdir, + df.shape[0], + year, + df_age, + n_clst, + method, + cluster_by, + folder_name, + colors, + font_name, + ) make_scatterplot(package) - \ No newline at end of file diff --git a/analysis/age/main.py b/analysis/age/main.py index 205d276..bdbeb37 100644 --- a/analysis/age/main.py +++ b/analysis/age/main.py @@ -5,12 +5,16 @@ from matplotlib import font_manager from analysis.age.most_common_age_group import most_common_age_group from analysis.age.hist_groups import cluster + # 경고 무시 warnings.filterwarnings("ignore", category=FutureWarning) BASE_DIR = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir) # matplotlib 한국어 폰트 설정 -font_name = font_manager.FontProperties(fname=os.path.join(BASE_DIR, "_data", "NanumSquareL.ttf")).get_name() +font_name = font_manager.FontProperties( + fname=os.path.join(BASE_DIR, "_data", "NanumSquareL.ttf") +).get_name() + def main(): for folder_name in ["지선-당선", "지선-후보"]: @@ -18,7 +22,9 @@ def main(): # folder_name = input("_data 내의 폴더 이름은 무엇인가요?") # cluster_by = input("구역을 나눌 기준을 입력해주세요 (sdName 즉 시/도 또는 wiwName 즉 기초단체단위): ") datadir = os.path.join(BASE_DIR, "_data", folder_name) - outdir = os.path.join(BASE_DIR, "output", f"age_all_{cluster_by}", folder_name) + outdir = os.path.join( + BASE_DIR, "output", f"age_all_{cluster_by}", folder_name + ) for d in os.listdir(datadir): # xlsx 파일을 읽어옵니다. @@ -31,7 +37,12 @@ def main(): df = df.sort_values(by="age") year = d[7:11] # most_common_age_group(df, year) - cluster(df, year, 7, 'kmeans', cluster_by, outdir, font_name, folder_name) - cluster(df, year, 7, 'equal', cluster_by, outdir, font_name, folder_name) + cluster( + df, year, 7, "kmeans", cluster_by, outdir, font_name, folder_name + ) + cluster( + df, year, 7, "equal", cluster_by, outdir, font_name, folder_name + ) + main() diff --git a/analysis/age/most_common_age_group.py b/analysis/age/most_common_age_group.py index bafead3..3175861 100644 --- a/analysis/age/most_common_age_group.py +++ b/analysis/age/most_common_age_group.py @@ -1,6 +1,7 @@ # coding=utf-8 import pandas as pd + def most_common_age_group(df, d): """10년단위로 무리짓고 가장 사람 많은 무리 출력. df: 데이터프레임 From a3648d634788a87a0eb1592ae256caa887679978 Mon Sep 17 00:00:00 2001 From: Re-st Date: Wed, 8 Nov 2023 20:30:32 +0900 Subject: [PATCH 08/12] =?UTF-8?q?[scrap]=20council=5Fid=20=EC=A0=95?= =?UTF-8?q?=EC=88=98=EB=A1=9C=20=EB=B3=80=ED=99=98,=20import=20=5F=5Finit?= =?UTF-8?q?=5F=5F=EA=B3=BC=20basic=EC=97=90=20=EB=AA=B0=EC=95=84=EC=A3=BC?= =?UTF-8?q?=EA=B8=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scrap/local_councils/__init__.py | 17 +- scrap/local_councils/basic.py | 14 +- scrap/local_councils/busan.py | 242 +++-------- scrap/local_councils/chungcheong.py | 74 +--- scrap/local_councils/daegu.py | 117 ++---- scrap/local_councils/daejeon.py | 73 +--- scrap/local_councils/gangwon.py | 170 ++------ scrap/local_councils/gwangju.py | 44 +- scrap/local_councils/gyeonggi.py | 44 +- scrap/local_councils/incheon.py | 106 ++--- scrap/local_councils/jeolla.py | 150 ++----- scrap/local_councils/seoul.py | 327 ++++----------- scrap/local_councils/ulsan.py | 82 +--- scrap/utils/scrap_args.json | 611 ++++++++++++++++++++++++++++ scrap/utils/spreadsheet.py | 577 ++------------------------ scrap/utils/types.py | 2 +- 16 files changed, 970 insertions(+), 1680 deletions(-) create mode 100644 scrap/utils/scrap_args.json diff --git a/scrap/local_councils/__init__.py b/scrap/local_councils/__init__.py index a4e1fc8..9c6b97e 100644 --- a/scrap/local_councils/__init__.py +++ b/scrap/local_councils/__init__.py @@ -2,6 +2,17 @@ 각 기초의회들의 크롤링 코드를 모아놓은 패키지입니다. 광역자치단체 별로 폴더를 만들어서 관리합니다. """ -from .daejeon import * -from .ulsan import * -from .basic import * +import re +from urllib.parse import urlparse +from typing import List +from scrap.utils.types import Councilor, ScrapResult, ScrapBasicArgument +from scrap.utils.requests import get_soup +from scrap.utils.types import CouncilType +from scrap.utils.utils import getPartyList + +def returncouncilors(cid, councilors): + return ScrapResult( + council_id=cid, + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors, + ) \ No newline at end of file diff --git a/scrap/local_councils/basic.py b/scrap/local_councils/basic.py index b71e162..7354217 100644 --- a/scrap/local_councils/basic.py +++ b/scrap/local_councils/basic.py @@ -1,8 +1,4 @@ -from urllib.parse import urlparse - -from scrap.utils.types import CouncilType, Councilor, ScrapResult, ScrapBasicArgument -from scrap.utils.requests import get_soup -from scrap.utils.utils import getPartyList +from scrap.local_councils import * import re import requests import copy @@ -11,7 +7,6 @@ party_keywords = getPartyList() party_keywords.append("무소속") - def find(soup, element, class_): if class_ is None: return soup.find(element) @@ -146,7 +141,6 @@ def get_party_easy(profile, wrapper_element, wrapper_class_, wrapper_txt, url): assert party is not None return party - def scrap_basic(url, cid, args: ScrapBasicArgument, encoding="utf-8") -> ScrapResult: """의원 상세약력 스크랩 :param url: 의원 목록 사이트 url @@ -193,11 +187,7 @@ def scrap_basic(url, cid, args: ScrapBasicArgument, encoding="utf-8") -> ScrapRe raise RuntimeError("[basic.py] 의원 정당을 가져오는데 실패했습니다. 이유: " + str(e)) councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(cid), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) if __name__ == "__main__": diff --git a/scrap/local_councils/busan.py b/scrap/local_councils/busan.py index af8987a..42b6159 100644 --- a/scrap/local_councils/busan.py +++ b/scrap/local_councils/busan.py @@ -1,18 +1,13 @@ import os -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup, get_selenium, By - - +from scrap.utils.requests import get_selenium, By +from scrap.local_councils import * +from scrap.local_councils.basic import returncouncilors def scrap_26( - url="https://www.bsjunggu.go.kr/council/board/list.junggu?boardId=BBS_0000118&menuCd=DOM_000000503003000000&contentsSid=755&cpath=%2Fcouncil", + url, cid ) -> ScrapResult: - """부산시 중구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ + """부산 중구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -31,21 +26,13 @@ def scrap_26( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="busan-junggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) def scrap_27( - url="https://www.bsseogu.go.kr/council/board/list.bsseogu?boardId=BBS_0000097&categoryCode1=8&menuCd=DOM_000000603001000000&contentsSid=785&cpath=%2Fcouncil", + url, + cid ) -> ScrapResult: - """부산시 서구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ + """부산 서구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -70,21 +57,13 @@ def scrap_27( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="busan-seogu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + returncouncilors(cid, councilors) def scrap_28( - url="https://www.bsdonggu.go.kr/council/index.donggu?menuCd=DOM_000000502004000000", + url, cid ) -> ScrapResult: - """부산시 동구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ + """부산 동구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -99,19 +78,11 @@ def scrap_28( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="busan-donggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + returncouncilors(cid, councilors) -def scrap_29(url="https://www.yeongdo.go.kr/council/01211/01212.web") -> ScrapResult: - """부산시 영도구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ +def scrap_29(url, cid) -> ScrapResult: + """부산 영도구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -128,21 +99,13 @@ def scrap_29(url="https://www.yeongdo.go.kr/council/01211/01212.web") -> ScrapRe councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="busan-yeongdogu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_30( - url="https://council.busanjin.go.kr/content/member/member.html", + url, cid ) -> ScrapResult: - """부산시 부산진구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ + """부산 부산진구""" soup = get_soup(url, verify=False).find("ul", class_="mlist") councilors: list[Councilor] = [] @@ -159,21 +122,13 @@ def scrap_30( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="busan-busanjingu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_31( - url="http://council.dongnae.go.kr/source/kr/member/active.html", + url, cid ) -> ScrapResult: - """부산시 동래구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ + """부산 동래구""" soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -187,19 +142,11 @@ def scrap_31( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="busan-dongnaegu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) -def scrap_32(url="https://council.bsnamgu.go.kr/kr/member/active") -> ScrapResult: - """부산시 남구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ +def scrap_32(url, cid) -> ScrapResult: + """부산 남구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -219,21 +166,12 @@ def scrap_32(url="https://council.bsnamgu.go.kr/kr/member/active") -> ScrapResul councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="busan-namgu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_33( - url="https://www.bsbukgu.go.kr/council/index.bsbukgu?menuCd=DOM_000000808001001000", -) -> ScrapResult: - """부산시 북구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ +url, cid) -> ScrapResult: + """부산 북구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -248,21 +186,12 @@ def scrap_33( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="busan-bukgu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_34( - url="https://council.haeundae.go.kr/board/list.do?boardId=BBS_0000096&categoryCode1=08&menuCd=DOM_000000702001001000&contentsSid=330", -) -> ScrapResult: - """부산시 해운대구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ +url, cid) -> ScrapResult: + """부산 해운대구""" soup = get_soup(url, verify=False).find("div", class_="initial_list") councilors: list[Councilor] = [] @@ -285,21 +214,12 @@ def scrap_34( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="busan-haeundaegu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_35( - url="https://council.gijang.go.kr/source/korean/member/active.html", -) -> ScrapResult: - """부산시 기장군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ +url, cid) -> ScrapResult: + """부산 기장군""" soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -316,21 +236,12 @@ def scrap_35( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="busan-gijanggun", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_36( - url="https://www.saha.go.kr/council/congressMember/list03.do?mId=0403000000", -) -> ScrapResult: - """부산시 사하구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ +url, cid) -> ScrapResult: + """부산 사하구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -345,21 +256,12 @@ def scrap_36( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="busan-sahagu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_37( - url="https://council.geumjeong.go.kr/index.geumj?menuCd=DOM_000000716001000000", -) -> ScrapResult: - """부산시 금정구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ +url, cid) -> ScrapResult: + """부산 금정구""" soup = get_soup(url, verify=False).find("div", class_="council_list") councilors: list[Councilor] = [] @@ -376,21 +278,12 @@ def scrap_37( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="busan-geumjeonggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_38( - url="https://www.bsgangseo.go.kr/council/contents.do?mId=0203000000", -) -> ScrapResult: - """부산시 강서구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ +url, cid) -> ScrapResult: + """부산 강서구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -409,21 +302,12 @@ def scrap_38( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="busan-gangseogu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_39( - url="https://www.yeonje.go.kr/council/assemblyIntro/list.do?mId=0201000000", -) -> ScrapResult: - """부산시 연제구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ +url, cid) -> ScrapResult: + """부산 연제구""" councilors: list[Councilor] = [] browser = get_selenium(url) @@ -450,23 +334,13 @@ def scrap_39( browser.switch_to.window(cur_win) councilors.append(Councilor(name, party)) - councilors.append(Councilor(name, party)) - return ScrapResult( - council_id="busan-yeonjegu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_40( - url="https://www.suyeong.go.kr/council/index.suyeong?menuCd=DOM_000001402001001000&link=success&cpath=%2Fcouncil", -) -> ScrapResult: - """부산시 수영구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ +url, cid) -> ScrapResult: + """부산 수영구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -481,21 +355,13 @@ def scrap_40( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="busan-suyeonggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_41( - url="https://www.sasang.go.kr/council/index.sasang?menuCd=DOM_000000202005000000", + url, cid ) -> ScrapResult: - """부산시 사상구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ + """부산 사상구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -515,11 +381,7 @@ def scrap_41( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="busan-sasanggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) if __name__ == "__main__": diff --git a/scrap/local_councils/chungcheong.py b/scrap/local_councils/chungcheong.py index 87407c3..caf8323 100644 --- a/scrap/local_councils/chungcheong.py +++ b/scrap/local_councils/chungcheong.py @@ -1,12 +1,8 @@ -from urllib.parse import urlparse - -from scrap.utils.types import CouncilType, Councilor, ScrapResult, ScrapBasicArgument -from scrap.utils.requests import get_soup +from scrap.local_councils import * from scrap.local_councils.basic import * - def scrap_124( - url="https://council.cheongju.go.kr/content/member/member.html", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: """충청북도 청주시 페이지에서 의원 상세약력 스크랩 @@ -32,16 +28,11 @@ def scrap_124( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(124), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_125( - url="https://council.chungju.go.kr/content/member/memberName.html", - args: ScrapBasicArgument = None, +url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: """충청북도 충주시 페이지에서 의원 상세약력 스크랩 @@ -65,16 +56,11 @@ def scrap_125( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(125), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_126( - url="https://www.jecheon.go.kr/council/lawmakerList.do?key=4393", - args: ScrapBasicArgument = None, +url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: """충청북도 제천시 페이지에서 의원 상세약력 스크랩 @@ -97,16 +83,11 @@ def scrap_126( name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(126), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_132( - url="https://council.jincheon.go.kr/council/lawmakerList.do?key=70", - args: ScrapBasicArgument = None, +url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: """충청북도 제천시 페이지에서 의원 상세약력 스크랩 @@ -130,16 +111,11 @@ def scrap_132( name = name_tag.get_text(strip=True).split()[0] # 김철수 의원 -> 김철수 councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(132), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_134( - url="https://council.jp.go.kr/source/korean/member/active.html", - args: ScrapBasicArgument = None, +url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: """충청북도 증평군 페이지에서 의원 상세약력 스크랩 @@ -164,16 +140,11 @@ def scrap_134( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(134), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_140( - url="https://council.taean.go.kr/main/index.php?m_cd=11", - args: ScrapBasicArgument = None, +url, cid, args: ScrapBasicArgument = None ) -> ScrapResult: """충청남도 태안군 페이지에서 의원 상세약력 스크랩 @@ -194,22 +165,13 @@ def scrap_140( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(140), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_142( - url="https://www.nonsancl.go.kr/kr/member/active", - args: ScrapBasicArgument = None, +url, cid, args ) -> ScrapResult: - """충청남도 논산시 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ + """충청남도 논산시""" base_url = "https://www.nonsancl.go.kr/kr/member/profile_popup?uid=" soup = get_soup(url) councilors: list[Councilor] = [] @@ -230,11 +192,7 @@ def scrap_142( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(142), - council_type=CouncilType.LOCAL_COUNCIL.value, - councilors=councilors, - ) + return returncouncilors(cid, councilors) if __name__ == "__main__": diff --git a/scrap/local_councils/daegu.py b/scrap/local_councils/daegu.py index f565e11..4ed28cb 100644 --- a/scrap/local_councils/daegu.py +++ b/scrap/local_councils/daegu.py @@ -1,17 +1,8 @@ -from urllib.parse import urlparse - -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup - +from scrap.local_councils import * def scrap_42( - url="https://junggucouncil.daegu.kr/source/main03/main01.html?d_th=8", -) -> ScrapResult: - """대전시 중구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ +url, cid) -> ScrapResult: + """대구 중구""" soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -28,20 +19,12 @@ def scrap_42( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="daejeon-junggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_43( - url="https://www.donggucl.daegu.kr/content/member/member.html", -) -> ScrapResult: - """대전시 동구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +url, cid) -> ScrapResult: + """대구 동구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -69,18 +52,11 @@ def scrap_43( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="daejeon-donggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_44(url="https://www.dgscouncil.go.kr/kr/member/active") -> ScrapResult: - """대전시 서구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_44(url, cid) -> ScrapResult: + """대구 서구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -100,20 +76,12 @@ def scrap_44(url="https://www.dgscouncil.go.kr/kr/member/active") -> ScrapResult councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="daejeon-seogu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_45( - url="https://nam.daegu.kr/council/index.do?menu_id=00000548", -) -> ScrapResult: - """대전시 남구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +url, cid) -> ScrapResult: + """대구 남구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -131,18 +99,11 @@ def scrap_45( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="daejeon-namgu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_46(url="https://bukgucouncil.daegu.kr/kr/member/name.do") -> ScrapResult: - """대전시 북구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_46(url, cid) -> ScrapResult: + """대구 북구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -160,20 +121,12 @@ def scrap_46(url="https://bukgucouncil.daegu.kr/kr/member/name.do") -> ScrapResu councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="daejeon-bukgu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_47( - url="https://suseongcouncil.suseong.kr/ss_council/content/?pos=active&me_code=2010", -) -> ScrapResult: - """대전시 수성구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +url, cid) -> ScrapResult: + """대구 수성구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -189,20 +142,12 @@ def scrap_47( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="daejeon-suseonggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_48( - url="https://www.dalseocouncil.daegu.kr/content/member/member.html", -) -> ScrapResult: - """대전시 달서구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +url, cid) -> ScrapResult: + """대구 달서구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -221,20 +166,12 @@ def scrap_48( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="daejeon-dalseogu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_49( - url="https://council.dalseong.go.kr/content/member/member.html", -) -> ScrapResult: - """대전시 달성군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +url, cid) -> ScrapResult: + """대구 달성군 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -266,11 +203,7 @@ def scrap_49( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="daejeon-dalseonggun", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) if __name__ == "__main__": diff --git a/scrap/local_councils/daejeon.py b/scrap/local_councils/daejeon.py index 091a130..b0ae78b 100644 --- a/scrap/local_councils/daejeon.py +++ b/scrap/local_councils/daejeon.py @@ -1,16 +1,7 @@ -from urllib.parse import urlparse +from scrap.local_councils import * -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import re - - -def scrap_65(url="https://council.donggu.go.kr/kr/member/active") -> ScrapResult: - """대전시 동구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_65(url, cid) -> ScrapResult: + """대전 동구 """ soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -40,18 +31,11 @@ def scrap_65(url="https://council.donggu.go.kr/kr/member/active") -> ScrapResult councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="daejeon-donggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_66(url="https://council.djjunggu.go.kr/kr/member/name.do") -> ScrapResult: - """대전시 중구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_66(url, cid) -> ScrapResult: + """대전 중구 """ soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -66,20 +50,13 @@ def scrap_66(url="https://council.djjunggu.go.kr/kr/member/name.do") -> ScrapRes party = party_info.find_next("span").get_text(strip=True) councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="daejeon-junggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_67( - url="https://www.seogucouncil.daejeon.kr/svc/mbr/MbrPresent.do", + url, cid, ) -> ScrapResult: - """대전시 서구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """대전 서구 """ soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -97,18 +74,11 @@ def scrap_67( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="daejeon-seogu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_68(url="https://yuseonggucouncil.go.kr/page/page02_01_01.php") -> ScrapResult: - """대전시 유성구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_68(url, cid) -> ScrapResult: + """대전 유성구 """ soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -125,18 +95,11 @@ def scrap_68(url="https://yuseonggucouncil.go.kr/page/page02_01_01.php") -> Scra party = party_info.find_next("span").get_text(strip=True) councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="daejeon-yuseonggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_69(url="https://council.daedeok.go.kr/kr/member/name.do") -> ScrapResult: - """대전시 대덕구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_69(url, cid) -> ScrapResult: + """대전 대덕구 """ soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -152,11 +115,7 @@ def scrap_69(url="https://council.daedeok.go.kr/kr/member/name.do") -> ScrapResu party = party_info.find_next("span").get_text(strip=True) councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="daejeon-daedeokgu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) if __name__ == "__main__": diff --git a/scrap/local_councils/gangwon.py b/scrap/local_councils/gangwon.py index 326920f..ed2b29f 100644 --- a/scrap/local_councils/gangwon.py +++ b/scrap/local_councils/gangwon.py @@ -4,7 +4,7 @@ from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options -from scrap.utils.types import CouncilType, Councilor, ScrapResult, ScrapBasicArgument +from scrap.utils.types import Councilor, ScrapResult, ScrapBasicArgument from scrap.utils.requests import get_soup from scrap.local_councils.basic import * from scrap.utils.utils import getPartyList @@ -14,12 +14,9 @@ def scrap_107( - url="https://council.wonju.go.kr/content/member/memberName.html", + url, cid, ) -> ScrapResult: - """강원도 원주시 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """강원도 원주시 """ councilors: list[Councilor] = [] @@ -55,28 +52,13 @@ def scrap_107( councilors.append(Councilor(name, party)) - return ScrapResult( - council_id="107", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -# 107: ScrapBasicArgument( -# pf_memlistelt="div", -# pf_memlistcls="content", -# pf_elt="dl", -# name_elt="dd", -# name_cls="name", -# pty_elt="span", -# ), + return returncouncilors(cid, councilors) + + def scrap_113( - url="https://sokchocl.go.kr/kr/member/active.do", args: ScrapBasicArgument = None + url, cid, args: ScrapBasicArgument = None ) -> ScrapResult: - """강원도 속초시 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """강원도 속초시 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -94,21 +76,14 @@ def scrap_113( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(113), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_114( - url="https://council.gwgs.go.kr/Home/H20000/H20100/membProfileActiveList", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 고성군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """강원도 고성군 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -128,21 +103,14 @@ def scrap_114( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(114), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_115( - url="https://www.yangyangcouncil.go.kr/kr/member/name.do", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 양양군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """강원도 양양군 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -160,21 +128,14 @@ def scrap_115( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(115), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_116( - url="https://www.injecl.go.kr/content/members/memberName.html", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 인제군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """강원도 인제군 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -188,21 +149,14 @@ def scrap_116( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(116), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_117( - url="https://www.hccouncil.go.kr/source/korean/member/active.html", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 홍천군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """강원도 홍천군 """ soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -221,21 +175,14 @@ def scrap_117( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(117), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_118( - url="https://www.hsg.go.kr/council/contents.do?key=1423&", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 횡성군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """강원도 횡성군 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -249,21 +196,14 @@ def scrap_118( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(118), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_119( - url="https://council.yw.go.kr/content/member/active.html", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 영월군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """강원도 영월군 """ base_url = "https://council.yw.go.kr" soup = get_soup(url) @@ -289,21 +229,14 @@ def scrap_119( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(119), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_120( - url="https://cl.happy700.or.kr/kr/member/active.do", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 평창군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """강원도 평창군 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -319,21 +252,14 @@ def scrap_120( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(120), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_121( - url="http://council.ihc.go.kr/bbs/content.php?co_id=sub03_2", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 화천군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """강원도 화천군 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -347,21 +273,14 @@ def scrap_121( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(121), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_122( - url="http://www.ygcl.go.kr/portal/F20000/F20100/html", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 양구군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """강원도 양구군 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -375,21 +294,14 @@ def scrap_122( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(122), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_123( - url="https://council.cwg.go.kr/council/contents.do?key=507", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 철원군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """강원도 철원군 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -406,11 +318,7 @@ def scrap_123( # TODO councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(123), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) if __name__ == "__main__": diff --git a/scrap/local_councils/gwangju.py b/scrap/local_councils/gwangju.py index b0c635c..aa3c15b 100644 --- a/scrap/local_councils/gwangju.py +++ b/scrap/local_councils/gwangju.py @@ -5,21 +5,15 @@ from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options - -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.utils import getPartyList - +from scrap.local_councils import * party_keywords = getPartyList() party_keywords.append("무소속") def scrap_62( - url="http://www.gjnc.or.kr/main/contents/lawmakerDistrict", + url, cid ) -> ScrapResult: - """광주시 서구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """광주 서구 """ councilors: list[Councilor] = [] @@ -60,20 +54,13 @@ def scrap_62( councilors.append(Councilor(name, party)) - return ScrapResult( - council_id="62", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_63( - url="https://council.bukgu.gwangju.kr/index.do?PID=024", + url, cid ) -> ScrapResult: - """광주시 북구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """광주 북구 """ councilors: list[Councilor] = [] @@ -106,20 +93,13 @@ def scrap_63( councilors.append(Councilor(name, party)) - return ScrapResult( - council_id="63", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_64( - url="https://gjgc.or.kr/main/contents/lawmaker", + url, cid ) -> ScrapResult: - """광주시 광산구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """광주 광산구 """ councilors: list[Councilor] = [] @@ -155,8 +135,4 @@ def scrap_64( councilors.append(Councilor(name, party)) - return ScrapResult( - council_id="64", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) diff --git a/scrap/local_councils/gyeonggi.py b/scrap/local_councils/gyeonggi.py index 8d22ab0..4641ea4 100644 --- a/scrap/local_councils/gyeonggi.py +++ b/scrap/local_councils/gyeonggi.py @@ -1,12 +1,9 @@ """경기도를 스크랩. """ -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -from scrap.local_councils.basic import * +from scrap.local_councils import * +from scrap.local_councils.basic import find, regex_pattern, find_all, extract_party, get_name, get_party_easy - -def get_profiles_88(soup, element, class_, memberlistelement, memberlistclass_): - # 의원 목록 사이트에서 의원 프로필을 가져옴 +def get_profiles_88_103(soup, element, class_, memberlistelement, memberlistclass_): if memberlistelement is not None: try: soup = soup.find_all(memberlistelement, id=memberlistclass_)[0] @@ -16,7 +13,6 @@ def get_profiles_88(soup, element, class_, memberlistelement, memberlistclass_): def get_party_88(profile, element, class_, wrapper_element, wrapper_class_, url): - # 의원 프로필에서 의원이 몸담는 정당 이름을 가져옴 if wrapper_element is not None: parsed_url = urlparse(url) base_url = f"{parsed_url.scheme}://{parsed_url.netloc}" @@ -42,18 +38,11 @@ def get_party_88(profile, element, class_, wrapper_element, wrapper_class_, url) return "[basic.py] 정당 정보 파싱 불가" -def scrap_88(url, args: ScrapBasicArgument) -> ScrapResult: - """의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :param args: ScrapBasicArgument 객체 - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - cid = 88 +def scrap_88(url, cid, args: ScrapBasicArgument) -> ScrapResult: encoding = "euc-kr" soup = get_soup(url, verify=False, encoding=encoding) councilors: list[Councilor] = [] - party_in_main_page = any(keyword in soup.text for keyword in party_keywords) - profiles = get_profiles_88( + profiles = get_profiles_88_103( soup, args.pf_elt, args.pf_cls, args.pf_memlistelt, args.pf_memlistcls ) print(cid, "번째 의회에는,", len(profiles), "명의 의원이 있습니다.") # 디버깅용. @@ -79,15 +68,10 @@ def scrap_88(url, args: ScrapBasicArgument) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(cid), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def get_party_103(profile, element, class_, wrapper_element, wrapper_class_, url): - # 의원 프로필에서 의원이 몸담는 정당 이름을 가져옴 if wrapper_element is not None: parsed_url = urlparse(url) base_url = f"{parsed_url.scheme}://{parsed_url.netloc}" @@ -113,17 +97,11 @@ def get_party_103(profile, element, class_, wrapper_element, wrapper_class_, url return "[basic.py] 정당 정보 파싱 불가" -def scrap_103(url, args: ScrapBasicArgument) -> ScrapResult: - """의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :param args: ScrapBasicArgument 객체 - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ +def scrap_103(url, cid, args: ScrapBasicArgument) -> ScrapResult: cid = 103 soup = get_soup(url, verify=False) councilors: list[Councilor] = [] - party_in_main_page = any(keyword in soup.text for keyword in party_keywords) - profiles = get_profiles_88( + profiles = get_profiles_88_103( soup, args.pf_elt, args.pf_cls, args.pf_memlistelt, args.pf_memlistcls ) print(cid, "번째 의회에는,", len(profiles), "명의 의원이 있습니다.") # 디버깅용. @@ -138,8 +116,4 @@ def scrap_103(url, args: ScrapBasicArgument) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(cid), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) diff --git a/scrap/local_councils/incheon.py b/scrap/local_councils/incheon.py index 5e015ae..c5b49a8 100644 --- a/scrap/local_councils/incheon.py +++ b/scrap/local_councils/incheon.py @@ -8,13 +8,10 @@ find, extract_party, ) +from scrap.local_councils import * - -def scrap_50(url="https://www.icjg.go.kr/council/cnmi0101c") -> ScrapResult: - """인천시 중구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_50(url, cid) -> ScrapResult: + """인천 중구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -26,20 +23,13 @@ def scrap_50(url="https://www.icjg.go.kr/council/cnmi0101c") -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="incheon-junggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_51(url="https://council.icdonggu.go.kr/korean/member/active") -> ScrapResult: - """인천시 동구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_51(url, cid) -> ScrapResult: + """인천 동구 """ - raise Exception("현재 인천시 동구의회 사이트는 SSLV3_ALERT_HANDSHAKE_FAILURE 에러가 발생합니다") + raise Exception("현재 인천 동구의회 사이트는 SSLV3_ALERT_HANDSHAKE_FAILURE 에러가 발생합니다") # soup = get_soup(url, verify=False) # councilors: list[Councilor] = [] @@ -64,20 +54,13 @@ def scrap_51(url="https://council.icdonggu.go.kr/korean/member/active") -> Scrap # councilors.append(Councilor(name=name, party=party)) -# return ScrapResult( -# council_id="incheon-donggu", -# council_type=CouncilType.LOCAL_COUNCIL, -# councilors=councilors -# ) +# return returncouncilors(cid, councilors) def scrap_52( - url="https://www.michuhol.go.kr/council/introduction/career.asp", + url, cid ) -> ScrapResult: - """인천시 미추홀구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """인천 미추홀구 """ councilors: list[Councilor] = [] @@ -98,18 +81,11 @@ def scrap_52( councilors.append(Councilor(name, party)) - return ScrapResult( - council_id="incheon-michuholgu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_53(url="https://council.yeonsu.go.kr/kr/member/name.do") -> ScrapResult: - """인천시 연수구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_53(url, cid) -> ScrapResult: + """인천 연수구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -127,18 +103,11 @@ def scrap_53(url="https://council.yeonsu.go.kr/kr/member/name.do") -> ScrapResul councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="incheon-yeonsugu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) -def scrap_54(url="https://council.namdong.go.kr/kr/member/active.do") -> ScrapResult: - """인천시 남동구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_54(url, cid) -> ScrapResult: + """인천 남동구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -154,20 +123,13 @@ def scrap_54(url="https://council.namdong.go.kr/kr/member/active.do") -> ScrapRe councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="incheon-namdonggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_55(url="https://council.icbp.go.kr/kr/member/active") -> ScrapResult: - """인천시 부평구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_55(url, cid) -> ScrapResult: + """인천 부평구 """ - raise Exception("현재 인천시 부평구의회 사이트는 SSLV3_ALERT_HANDSHAKE_FAILURE 에러가 발생합니다") + raise Exception("현재 인천 부평구의회 사이트는 SSLV3_ALERT_HANDSHAKE_FAILURE 에러가 발생합니다") # soup = get_soup(url, verify=False) # councilors: list[Councilor] = [] @@ -183,20 +145,17 @@ def scrap_55(url="https://council.icbp.go.kr/kr/member/active") -> ScrapResult: # councilors.append(Councilor(name=name, party=party)) - # return ScrapResult( - # council_id="incheon-bupyeonggu", + # return returncouncilors(cid, councilors) + # council_id=55, # council_type=CouncilType.LOCAL_COUNCIL, # councilors=councilors # ) def scrap_56( - url="https://www.gyeyang.go.kr/open_content/council/member/present/present.jsp", + url, cid ) -> ScrapResult: - """인천시 계양구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """인천 계양구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -213,18 +172,11 @@ def scrap_56( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="incheon-gyeyanggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_57(url, args) -> ScrapResult: - """인천시 서구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """인천 서구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -257,11 +209,7 @@ def scrap_57(url, args) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(cid), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) if __name__ == "__main__": diff --git a/scrap/local_councils/jeolla.py b/scrap/local_councils/jeolla.py index f419260..86ef7eb 100644 --- a/scrap/local_councils/jeolla.py +++ b/scrap/local_councils/jeolla.py @@ -1,18 +1,10 @@ -from urllib.parse import urlparse - -from scrap.utils.types import CouncilType, Councilor, ScrapResult, ScrapBasicArgument -from scrap.utils.requests import get_soup -from scrap.local_councils.basic import * - +from scrap.local_councils import * def scrap_154( - url="https://council.namwon.go.kr/member/member.php", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라북도 남원시 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """전라북도 남원시 """ soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -26,21 +18,14 @@ def scrap_154( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(154), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_155( - url="https://council.gimje.go.kr/index.gimje?menuCd=DOM_000000102001001000", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라북도 김제시 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """전라북도 김제시 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -54,21 +39,14 @@ def scrap_155( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(155), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_156( - url="https://council.wanju.go.kr/board?depth_1=10&depth_2=33", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라북도 완주군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """전라북도 완주군 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -83,21 +61,14 @@ def scrap_156( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(156), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_157( - url="https://council.jinan.go.kr/main2011/member/active.html", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라북도 진안군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """전라북도 진안군 """ soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -111,59 +82,34 @@ def scrap_157( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(157), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_160( - url="https://council.imsil.go.kr/main/contents/lawmakerDistrict", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라북도 임실군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """전라북도 임실군 """ # TODO: js로 동적으로 읽어옴 raise NotImplementedError - return ScrapResult( - council_id=str(160), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=[], - ) - def scrap_161( - url="https://www.sunchangcouncil.go.kr/main/contents/lawmaker", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라북도 순창군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """전라북도 순창군 """ # TODO: js로 동적으로 읽어옴 raise NotImplementedError - return ScrapResult( - council_id=str(161), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=[], - ) - def scrap_162( - url="https://www.gochang.go.kr/council/index.gochang?menuCd=DOM_000000603005000000", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라북도 고창군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """전라북도 고창군 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -177,21 +123,14 @@ def scrap_162( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(157), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_163( - url="https://council.buan.go.kr/index.buan?menuCd=DOM_000000104001002000", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라북도 부안군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """전라북도 부안군 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -211,21 +150,14 @@ def scrap_163( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(163), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_164( - url="https://council.mokpo.go.kr/kr/member/active", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라남도 목포시 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """전라남도 목포시 """ base_url = "https://council.mokpo.go.kr/" soup = get_soup(url, verify=False) @@ -246,21 +178,14 @@ def scrap_164( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(164), - council_type=CouncilType.LOCAL_COUNCIL.value, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_165( - url="https://council.yeosu.go.kr/source/korean/member/active.html", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라남도 여수시 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """전라남도 여수시 """ soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -275,21 +200,14 @@ def scrap_165( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(165), - council_type=CouncilType.LOCAL_COUNCIL.value, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_167( - url="https://council.naju.go.kr/source/korean/member/active.html", + url, cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라북도 나주시 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """전라북도 나주시 """ soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -303,11 +221,7 @@ def scrap_167( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id=str(167), - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) if __name__ == "__main__": diff --git a/scrap/local_councils/seoul.py b/scrap/local_councils/seoul.py index e530aad..33fc2e4 100644 --- a/scrap/local_councils/seoul.py +++ b/scrap/local_councils/seoul.py @@ -5,15 +5,12 @@ from scrap.utils.types import CouncilType, Councilor, ScrapResult from scrap.utils.requests import get_soup - +from scrap.local_councils import * def scrap_1( - url="https://bookcouncil.jongno.go.kr/record/recordView.do?key=99784f935fce5c1d7c8c08c2f9e35dda1c0a6128428ecb1a87f87ee2b4e82890ffcf12563e01473f", + url, cid, ) -> ScrapResult: - """서울시 종로구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """서울 종로구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -29,18 +26,11 @@ def scrap_1( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-jongno", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) -def scrap_2(url="https://02jgnew.council.or.kr/kr/member/active") -> ScrapResult: - """서울시 중구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_2(url, cid) -> ScrapResult: + """서울 중구 """ parliment_soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -66,18 +56,11 @@ def scrap_2(url="https://02jgnew.council.or.kr/kr/member/active") -> ScrapResult councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-junggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_3(url="https://www.yscl.go.kr/kr/member/name.do") -> ScrapResult: - """서울시 용산구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_3(url, cid) -> ScrapResult: + """서울 용산구 """ soup = get_soup(url, verify=False) @@ -94,18 +77,11 @@ def scrap_3(url="https://www.yscl.go.kr/kr/member/name.do") -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-yongsangu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) -def scrap_4(url="https://sdcouncil.sd.go.kr/kr/member/active2") -> ScrapResult: - """서울시 성동구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_4(url, cid) -> ScrapResult: + """서울 성동구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -121,18 +97,11 @@ def scrap_4(url="https://sdcouncil.sd.go.kr/kr/member/active2") -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-seongdonggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_5(url="https://council.gwangjin.go.kr/kr/member/active") -> ScrapResult: - """서울시 광진구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_5(url, cid) -> ScrapResult: + """서울 광진구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -150,18 +119,11 @@ def scrap_5(url="https://council.gwangjin.go.kr/kr/member/active") -> ScrapResul councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-gwangjingu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_6(url="http://council.ddm.go.kr/citizen/menu1.asp") -> ScrapResult: - """서울시 동대문구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_6(url, cid) -> ScrapResult: + """서울 동대문구 """ parliment_soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -192,18 +154,11 @@ def scrap_6(url="http://council.ddm.go.kr/citizen/menu1.asp") -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-dongdaemungu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) -def scrap_7(url="https://council.jungnang.go.kr/kr/member/name2.do") -> ScrapResult: - """서울시 중랑구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_7(url, cid) -> ScrapResult: + """서울 중랑구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -219,18 +174,11 @@ def scrap_7(url="https://council.jungnang.go.kr/kr/member/name2.do") -> ScrapRes councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-jungnanggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_8(url="https://www.sbc.go.kr/kr/member/active.do") -> ScrapResult: - """서울시 성북구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_8(url, cid) -> ScrapResult: + """서울 성북구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -248,18 +196,11 @@ def scrap_8(url="https://www.sbc.go.kr/kr/member/active.do") -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-seongbukgu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_9(url="https://council.gangbuk.go.kr/kr/member/name.do") -> ScrapResult: - """서울시 강북구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_9(url, cid) -> ScrapResult: + """서울 강북구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -279,20 +220,13 @@ def scrap_9(url="https://council.gangbuk.go.kr/kr/member/name.do") -> ScrapResul councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-gangbukgu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_10( - url="https://www.council-dobong.seoul.kr/kr/member/active.do", + url, cid, ) -> ScrapResult: - """서울시 도봉구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """서울 도봉구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -310,18 +244,11 @@ def scrap_10( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-dobonggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_11(url="https://council.nowon.kr/kr/member/active.do") -> ScrapResult: - """서울시 노원구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_11(url, cid) -> ScrapResult: + """서울 노원구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -339,18 +266,11 @@ def scrap_11(url="https://council.nowon.kr/kr/member/active.do") -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-nowongu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_12(url="https://council.ep.go.kr/kr/member/name.do") -> ScrapResult: - """서울시 은평구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_12(url, cid) -> ScrapResult: + """서울 은평구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -370,20 +290,13 @@ def scrap_12(url="https://council.ep.go.kr/kr/member/name.do") -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-eunpyeonggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_13( - url="https://www.sdmcouncil.go.kr/source/korean/square/ascending.html", + url, cid, ) -> ScrapResult: - """서울시 서대문구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + """서울 서대문구 """ soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -406,18 +319,11 @@ def scrap_13( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-seodaemungu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_14(url="https://council.mapo.seoul.kr/kr/member/active.do") -> ScrapResult: - """서울시 마포구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_14(url, cid) -> ScrapResult: + """서울 마포구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -433,18 +339,11 @@ def scrap_14(url="https://council.mapo.seoul.kr/kr/member/active.do") -> ScrapRe councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-mapogu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) -def scrap_15(url="https://www.ycc.go.kr/kr/member/active") -> ScrapResult: - """서울시 양천구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_15(url, cid) -> ScrapResult: + """서울 양천구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -474,18 +373,11 @@ def scrap_15(url="https://www.ycc.go.kr/kr/member/active") -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-yangcheongu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_16(url="https://gsc.gangseo.seoul.kr/member/org.asp") -> ScrapResult: - """서울시 강서구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_16(url, cid) -> ScrapResult: + """서울 강서구 """ soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -507,18 +399,11 @@ def scrap_16(url="https://gsc.gangseo.seoul.kr/member/org.asp") -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-gangseogu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) -def scrap_17(url="https://www.guroc.go.kr/kr/member/name.do") -> ScrapResult: - """서울시 구로구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_17(url, cid) -> ScrapResult: + """서울 구로구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -538,18 +423,11 @@ def scrap_17(url="https://www.guroc.go.kr/kr/member/name.do") -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-gurogu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_18(url="https://council.geumcheon.go.kr/member/member.asp") -> ScrapResult: - """서울시 금천구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_18(url, cid) -> ScrapResult: + """서울 금천구 """ soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -567,18 +445,11 @@ def scrap_18(url="https://council.geumcheon.go.kr/member/member.asp") -> ScrapRe councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-geumcheongu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_19(url="https://www.ydpc.go.kr/kr/member/active.do") -> ScrapResult: - """서울시 영등포구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_19(url, cid) -> ScrapResult: + """서울 영등포구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -594,18 +465,11 @@ def scrap_19(url="https://www.ydpc.go.kr/kr/member/active.do") -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-yeongdeungpogu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) -def scrap_20(url="http://assembly.dongjak.go.kr/kr/member/name.do") -> ScrapResult: - """서울시 동작구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_20(url, cid) -> ScrapResult: + """서울 동작구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -621,18 +485,11 @@ def scrap_20(url="http://assembly.dongjak.go.kr/kr/member/name.do") -> ScrapResu councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-dongjakgu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_21(url="https://www.ga21c.seoul.kr/kr/member/name.do") -> ScrapResult: - """서울시 관악구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_21(url, cid) -> ScrapResult: + """서울 관악구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -650,18 +507,11 @@ def scrap_21(url="https://www.ga21c.seoul.kr/kr/member/name.do") -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-gwanakgu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_22(url="https://www.sdc.seoul.kr/kr/member/active.do") -> ScrapResult: - """서울시 서초구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_22(url, cid) -> ScrapResult: + """서울 서초구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -679,18 +529,11 @@ def scrap_22(url="https://www.sdc.seoul.kr/kr/member/active.do") -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-seochogu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) -def scrap_23(url="https://www.gncouncil.go.kr/kr/member/name.do") -> ScrapResult: - """서울시 강남구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_23(url, cid) -> ScrapResult: + """서울 강남구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -710,18 +553,11 @@ def scrap_23(url="https://www.gncouncil.go.kr/kr/member/name.do") -> ScrapResult councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-gangnamgu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_24(url="https://council.songpa.go.kr/kr/member/active.do") -> ScrapResult: - """서울시 송파구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_24(url, cid) -> ScrapResult: + """서울 송파구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -737,18 +573,11 @@ def scrap_24(url="https://council.songpa.go.kr/kr/member/active.do") -> ScrapRes councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-songpagu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_25(url="https://council.gangdong.go.kr/kr/member/active.do") -> ScrapResult: - """서울시 강동구 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_25(url, cid) -> ScrapResult: + """서울 강동구 """ soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -766,11 +595,7 @@ def scrap_25(url="https://council.gangdong.go.kr/kr/member/active.do") -> ScrapR councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="seoul-gangdonggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) if __name__ == "__main__": diff --git a/scrap/local_councils/ulsan.py b/scrap/local_councils/ulsan.py index 52bcf31..a349733 100644 --- a/scrap/local_councils/ulsan.py +++ b/scrap/local_councils/ulsan.py @@ -1,21 +1,10 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import re - -regex_pattern = re.compile(r"정\s*\S*\s*당", re.IGNORECASE) # Case-insensitive - +from scrap.local_councils import * +from scrap.local_councils.basic import regex_pattern def scrap_70( - url="https://council.junggu.ulsan.kr/content/member/memberName.html", + url, cid ) -> ScrapResult: - """울산시 중구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ + """울산 중구""" soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -36,21 +25,13 @@ def scrap_70( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="ulsan-junggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_71( - url="https://www.namgucouncil.ulsan.kr/content/member/memberName.html", + url, cid ) -> ScrapResult: - """울산시 남구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ + """울산 남구""" soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -73,21 +54,13 @@ def scrap_71( councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="ulsan-namgu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) def scrap_72( - url="https://www.donggu-council.ulsan.kr/source/korean/member/active.html", + url, cid ) -> ScrapResult: - """울산시 동구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ + """울산 동구""" soup = get_soup(url, verify=False, encoding="euc-kr") councilors: List[Councilor] = [] @@ -103,19 +76,11 @@ def scrap_72( party = party_info[0].get_text(strip=True).split(": ")[1] councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="ulsan-donggu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) -def scrap_73(url="https://council.bukgu.ulsan.kr/kr/member/active.do") -> ScrapResult: - """울산시 북구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ +def scrap_73(url, cid) -> ScrapResult: + """울산 북구""" soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -131,23 +96,14 @@ def scrap_73(url="https://council.bukgu.ulsan.kr/kr/member/active.do") -> ScrapR party = party_info[0].get_text(strip=True).split(": ")[1] councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="ulsan-bukgu", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - + return returncouncilors(cid, councilors) -def scrap_74(url="https://assembly.ulju.ulsan.kr/kr/member/active") -> ScrapResult: - """울산시 울주군 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ +def scrap_74(url, cid) -> ScrapResult: + """울산 울주군""" soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - # 프로필 링크 스크랩을 위해 base_url 추출 parsed_url = urlparse(url) base_url = f"{parsed_url.scheme}://{parsed_url.netloc}" @@ -167,11 +123,7 @@ def scrap_74(url="https://assembly.ulju.ulsan.kr/kr/member/active") -> ScrapResu councilors.append(Councilor(name=name, party=party)) - return ScrapResult( - council_id="ulsan_uljugun", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) + return returncouncilors(cid, councilors) if __name__ == "__main__": diff --git a/scrap/utils/scrap_args.json b/scrap/utils/scrap_args.json new file mode 100644 index 0000000..a02f39f --- /dev/null +++ b/scrap/utils/scrap_args.json @@ -0,0 +1,611 @@ +{ + "2": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "em" + }, + "3": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "em" + }, + "57": { + "pf_elt": "div", + "pf_cls": "conbox", + "name_elt": "p", + "name_cls": "name", + "pty_elt": "li" + }, + "58": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "em" + }, + "59": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "div", + "name_cls": "name", + "pty_elt": "em" + }, + "60": { + "pf_elt": "div", + "pf_cls": "content", + "name_elt": "h5", + "pty_elt": "li", + "pty_wrapelt": "a" + }, + "61": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "em" + }, + "65": { + "pf_elt": "dl", + "pf_cls": "profile", + "name_elt": "strong", + "name_cls": "name", + "pty_elt": "strong" + }, + "66": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "div", + "name_cls": "name", + "pty_elt": "em" + }, + "67": { + "pf_elt": "dl", + "pf_memlistelt": "section", + "pf_memlistcls": "member", + "name_elt": "dd", + "name_cls": "name", + "pty_elt": "dd" + }, + "68": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "em" + }, + "69": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "em" + }, + "70": { + "pf_elt": "dl", + "pf_memlistelt": "section", + "pf_memlistcls": "memberName", + "name_elt": "dd", + "name_cls": "name", + "pty_elt": "dd" + }, + "71": { + "pf_elt": "dl", + "pf_memlistelt": "section", + "pf_memlistcls": "memberName", + "name_elt": "dd", + "name_cls": "name", + "pty_elt": "dd" + }, + "72": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "li", + "name_cls": "name", + "pty_elt": "li" + }, + "73": { + "pf_elt": "dl", + "pf_cls": "profile", + "name_elt": "strong", + "name_cls": "name", + "pty_elt": "li" + }, + "74": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "li", + "pty_wrapelt": "a", + "pty_wrapcls": "start" + }, + "75": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "div", + "name_cls": "name", + "pty_elt": "em" + }, + "76": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "em" + }, + "77": { + "pf_elt": "dl", + "pf_memlistelt": "section", + "pf_memlistcls": "mbrListByName", + "name_elt": "dd", + "name_cls": "name", + "pty_elt": "dd" + }, + "78": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "div", + "name_cls": "name", + "pty_elt": "li", + "pty_wrapelt": "a", + "pty_wrapcls": "end" + }, + "79": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "em" + }, + "80": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "em" + }, + "81": { + "pf_elt": "dd", + "pf_memlistelt": "div", + "pf_memlistcls": "member_list", + "name_elt": "p", + "pty_elt": "tr" + }, + "82": { + "pf_elt": "div", + "pf_cls": "conbox", + "pf_memlistelt": "div", + "pf_memlistcls": "cts1426_box", + "name_elt": "p", + "pty_elt": "li" + }, + "83": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "li", + "pty_wrapelt": "a", + "pty_wrapcls": "start" + }, + "84": { + "pf_elt": "div", + "pf_cls": "law_box", + "name_elt": "span", + "name_cls": "name", + "pty_elt": "p" + }, + "85": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "div", + "name_cls": "name", + "pty_elt": "em" + }, + "86": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "em" + }, + "87": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "em" + }, + "88": { + "pf_elt": "dl", + "pf_cls": "box", + "pf_memlistelt": "div", + "pf_memlistcls": "member_list", + "name_elt": "span", + "name_cls": "name", + "pty_elt": "li", + "pty_wrapelt": "p", + "pty_wrapcls": "btn" + }, + "89": { + "pf_elt": "dl", + "pf_memlistelt": "section", + "pf_memlistcls": "memberName", + "name_elt": "dd", + "name_cls": "name", + "pty_elt": "span" + }, + "90": { + "pf_elt": "dl", + "pf_cls": "profile", + "name_elt": "strong", + "name_cls": "name", + "pty_elt": "li" + }, + "91": { + "pf_elt": "dl", + "pf_memlistelt": "section", + "pf_memlistcls": "mbr0101", + "name_elt": "dd", + "name_cls": "name", + "pty_elt": "dd" + }, + "92": { + "pf_elt": "dl", + "pf_memlistelt": "section", + "pf_memlistcls": "member", + "name_elt": "dd", + "name_cls": "name", + "pty_elt": "dd" + }, + "93": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "div", + "name_cls": "name", + "pty_elt": "li", + "pty_wrapelt": "a", + "pty_wrapcls": "end" + }, + "94": { + "pf_elt": "dl", + "pf_memlistelt": "section", + "pf_memlistcls": "mbrListByName", + "name_elt": "dd", + "name_cls": "name", + "pty_elt": "dd" + }, + "95": { + "pf_elt": "dl", + "pf_memlistelt": "section", + "pf_memlistcls": "member", + "name_elt": "dd", + "name_cls": "name", + "pty_elt": "tr" + }, + "96": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "div", + "name_cls": "name", + "pty_elt": "em" + }, + "97": { + "pf_elt": "li", + "pf_memlistelt": "ul", + "pf_memlistcls": "memberList", + "name_elt": "strong", + "pty_elt": "tr", + "pty_wrapelt": "a" + }, + "98": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "em" + }, + "99": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "em" + }, + "100": { + "pf_elt": "div", + "pf_cls": "list", + "name_elt": "h4", + "name_cls": "h0", + "pty_elt": "li" + }, + "101": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "em" + }, + "102": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "li", + "pty_wrapelt": "a", + "pty_wrapcls": "start" + }, + "103": { + "pf_elt": "div", + "pf_cls": "col-sm-6", + "name_elt": "h5", + "name_cls": "h5", + "pty_elt": "li", + "pty_wrapelt": "a", + "pty_wrapcls": "d-inline-block" + }, + "104": { + "pf_elt": "div", + "pf_cls": "text_box", + "name_elt": "h3", + "name_cls": "h0", + "pty_elt": "li", + "pty_wrapelt": "a", + "pty_wraptxt": "\ub204\ub9ac\uc9d1" + }, + "105": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "em" + }, + "108": { + "pf_elt": "dl", + "pf_cls": "profile", + "name_elt": "strong", + "pty_elt": "li" + }, + "109": { + "pf_elt": "dl", + "pf_memlistelt": "section", + "pf_memlistcls": "memberName", + "name_elt": "dd", + "name_cls": "name", + "pty_elt": "span" + }, + "110": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "em" + }, + "112": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "em" + }, + "113": { + "pf_elt": "div", + "pf_cls": "profile", + "name_cls": "name", + "pty_elt": "li" + }, + "115": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "div", + "name_cls": "name", + "pty_elt": "li" + }, + "116": { + "pf_elt": "div", + "pf_cls": "memberName", + "name_cls": "name", + "pty_elt": "dd" + }, + "127": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "span" + }, + "128": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "span" + }, + "129": { + "pf_elt": "div", + "pf_cls": "right", + "pf_memlistelt": "ul", + "pf_memlistcls": "memberList", + "name_elt": "h5", + "pty_elt": "span" + }, + "130": { + "pf_elt": "div", + "pf_cls": "parliament_text", + "name_elt": "h3", + "name_cls": "h0", + "pty_elt": "span" + }, + "131": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "span" + }, + "133": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "div", + "name_cls": "name", + "pty_elt": "span" + }, + "135": { + "pf_elt": "dl", + "name_elt": "dd", + "name_cls": "name", + "pty_elt": "span", + "pty_cls": "itemContent" + }, + "136": { + "pf_elt": "div", + "pf_cls": "info", + "name_elt": "div", + "name_cls": "tit", + "pty_elt": "li" + }, + "137": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "span" + }, + "138": { + "pf_elt": "div", + "pf_cls": "item", + "name_elt": "h5", + "name_cls": "name", + "pty_elt": "span" + }, + "139": { + "pf_elt": "ul", + "pf_cls": "assembly_list", + "name_elt": "div", + "name_cls": "names", + "pty_elt": "li" + }, + "141": { + "pf_elt": "dl", + "name_elt": "dd", + "name_cls": "name", + "pty_elt": "span", + "pty_cls": "itemContent" + }, + "143": { + "pf_elt": "div", + "pf_cls": "card--body", + "name_elt": "strong", + "name_cls": "ui-list__title", + "pty_elt": "li" + }, + "144": { + "pf_elt": "dl", + "pf_memlistelt": "ul", + "pf_memlistcls": "mlist", + "name_elt": "dd", + "name_cls": "name", + "pty_elt": "span", + "pty_cls": "itemContent" + }, + "145": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "span" + }, + "146": { + "pf_elt": "div", + "pf_cls": "pt", + "name_elt": "strong", + "name_cls": "name", + "pty_elt": "li" + }, + "147": { + "pf_elt": "dl", + "pf_cls": "profile", + "name_elt": "strong", + "name_cls": "name", + "pty_elt": "span" + }, + "148": { + "pf_elt": "div", + "pf_cls": "col", + "name_elt": "strong", + "name_cls": "ui-list__title", + "pty_elt": "li" + }, + "149": { + "pf_elt": "dl", + "pf_cls": "profile", + "name_elt": "strong", + "name_cls": "name", + "pty_elt": "span" + }, + "150": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "strong", + "name_cls": "name", + "pty_elt": "span" + }, + "151": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "span" + }, + "152": { + "pf_elt": "div", + "pf_cls": "councillor_info", + "name_elt": "span", + "name_cls": "name", + "pty_elt": "li" + }, + "153": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "em", + "name_cls": "name", + "pty_elt": "span" + }, + "158": { + "pf_elt": "dl", + "pf_memlistelt": "ul", + "pf_memlistcls": "mlist", + "name_elt": "dd", + "name_cls": "name", + "pty_elt": "span", + "pty_cls": "itemContent" + }, + "159": { + "pf_elt": "dl", + "pf_memlistelt": "ul", + "pf_memlistcls": "mlist", + "name_elt": "dd", + "name_cls": "name", + "pty_elt": "span", + "pty_cls": "itemContent" + }, + "166": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "strong", + "pty_elt": "span" + }, + "168": { + "pf_elt": "dl", + "pf_memlistelt": "ul", + "pf_memlistcls": "mlist", + "name_elt": "dd", + "name_cls": "name", + "pty_elt": "span", + "pty_cls": "itemContent" + } +} \ No newline at end of file diff --git a/scrap/utils/spreadsheet.py b/scrap/utils/spreadsheet.py index 602cb13..32d8534 100644 --- a/scrap/utils/spreadsheet.py +++ b/scrap/utils/spreadsheet.py @@ -1,12 +1,18 @@ -import os, sys +import os +import sys +import gspread +import json + from google.oauth2.credentials import Credentials from google.auth.transport.requests import Request from google_auth_oauthlib.flow import InstalledAppFlow -import gspread - from scrap.local_councils.seoul import * +from scrap.local_councils.busan import * +from scrap.local_councils.daegu import * from scrap.local_councils.incheon import * from scrap.local_councils.gwangju import * +from scrap.local_councils.daejeon import * +from scrap.local_councils.ulsan import * from scrap.local_councils.gyeonggi import * from scrap.local_councils.gangwon import * from scrap.local_councils.chungcheong import * @@ -17,8 +23,9 @@ # 구글로부터 권한을 요청할 어플리케이션 목록 # 변경 시 token.json 삭제 후 재인증 필요 SCOPES = ["https://www.googleapis.com/auth/spreadsheets"] -BASE_DIR = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir) - +PWD = os.path.dirname(__file__) +BASE_DIR = os.path.join(PWD, os.pardir, os.pardir) +JSON_PATH = os.path.join(PWD, "scrap_args.json") def google_authorization(): """Google Sheets API 활용을 위한 인증 정보 요청 @@ -67,551 +74,10 @@ def main() -> None: ) no_information = [106, 111] errors = [] - args = { - 2: ScrapBasicArgument( - pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" - ), - 3: ScrapBasicArgument( - pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" - ), - # 인천 - 57: ScrapBasicArgument( - pf_elt="div", - pf_cls="conbox", - name_elt="p", - name_cls="name", - pty_elt="li", - ), - 58: ScrapBasicArgument( - pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" - ), - 59: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="div", - name_cls="name", - pty_elt="em", - ), - # 광주 - 60: ScrapBasicArgument( - pf_elt="div", pf_cls="content", name_elt="h5", pty_wrapelt="a", pty_elt="li" - ), - 61: ScrapBasicArgument( - pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" - ), - # 62 - 64 : gwangju.py - # 대전 - 65: ScrapBasicArgument( - pf_elt="dl", - pf_cls="profile", - name_elt="strong", - name_cls="name", - pty_elt="strong", - ), - 66: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="div", - name_cls="name", - pty_elt="em", - ), - 67: ScrapBasicArgument( - pf_memlistelt="section", - pf_memlistcls="member", - pf_elt="dl", - name_elt="dd", - name_cls="name", - pty_elt="dd", - ), - 68: ScrapBasicArgument( - pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" - ), - 69: ScrapBasicArgument( - pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" - ), - # 울산 - 70: ScrapBasicArgument( - pf_memlistelt="section", - pf_memlistcls="memberName", - pf_elt="dl", - name_elt="dd", - name_cls="name", - pty_elt="dd", - ), - 71: ScrapBasicArgument( - pf_memlistelt="section", - pf_memlistcls="memberName", - pf_elt="dl", - name_elt="dd", - name_cls="name", - pty_elt="dd", - ), - 72: ScrapBasicArgument( - pf_elt="div", pf_cls="profile", name_elt="li", name_cls="name", pty_elt="li" - ), - 73: ScrapBasicArgument( - pf_elt="dl", - pf_cls="profile", - name_elt="strong", - name_cls="name", - pty_elt="li", - ), - 74: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="em", - name_cls="name", - pty_wrapelt="a", - pty_wrapcls="start", - pty_elt="li", - ), - # 경기 - 75: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="div", - name_cls="name", - pty_elt="em", - ), - 76: ScrapBasicArgument( - pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" - ), - 77: ScrapBasicArgument( - pf_memlistelt="section", - pf_memlistcls="mbrListByName", - pf_elt="dl", - name_elt="dd", - name_cls="name", - pty_elt="dd", - ), - 78: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="div", - name_cls="name", - pty_wrapelt="a", - pty_wrapcls="end", - pty_elt="li", - ), - 79: ScrapBasicArgument( - pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" - ), - 80: ScrapBasicArgument( - pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" - ), - 81: ScrapBasicArgument( - pf_memlistelt="div", - pf_memlistcls="member_list", - pf_elt="dd", - name_elt="p", - pty_elt="tr", - ), - 82: ScrapBasicArgument( - pf_memlistelt="div", - pf_memlistcls="cts1426_box", - pf_elt="div", - pf_cls="conbox", - name_elt="p", - pty_elt="li", - ), - # 경기 - 동두천 - 83: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="em", - name_cls="name", - pty_wrapelt="a", - pty_wrapcls="start", - pty_elt="li", - ), - 84: ScrapBasicArgument( - pf_elt="div", - pf_cls="law_box", - name_elt="span", - name_cls="name", - pty_elt="p", - ), - 85: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="div", - name_cls="name", - pty_elt="em", - ), - 86: ScrapBasicArgument( - pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" - ), - 87: ScrapBasicArgument( - pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" - ), - 88: ScrapBasicArgument( - pf_memlistelt="div", - pf_memlistcls="member_list", - pf_elt="dl", - pf_cls="box", - name_elt="span", - name_cls="name", - pty_wrapelt="p", - pty_wrapcls="btn", - pty_elt="li", - ), - 89: ScrapBasicArgument( - pf_memlistelt="section", - pf_memlistcls="memberName", - pf_elt="dl", - name_elt="dd", - name_cls="name", - pty_elt="span", - ), - 90: ScrapBasicArgument( - pf_elt="dl", - pf_cls="profile", - name_elt="strong", - name_cls="name", - pty_elt="li", - ), - # 경기 - 화성 - 91: ScrapBasicArgument( - pf_memlistelt="section", - pf_memlistcls="mbr0101", - pf_elt="dl", - name_elt="dd", - name_cls="name", - pty_elt="dd", - ), - 92: ScrapBasicArgument( - pf_memlistelt="section", - pf_memlistcls="member", - pf_elt="dl", - name_elt="dd", - name_cls="name", - pty_elt="dd", - ), - 93: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="div", - name_cls="name", - pty_wrapelt="a", - pty_wrapcls="end", - pty_elt="li", - ), - 94: ScrapBasicArgument( - pf_memlistelt="section", - pf_memlistcls="mbrListByName", - pf_elt="dl", - name_elt="dd", - name_cls="name", - pty_elt="dd", - ), - 95: ScrapBasicArgument( - pf_memlistelt="section", - pf_memlistcls="member", - pf_elt="dl", - name_elt="dd", - name_cls="name", - pty_elt="tr", - ), - 96: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="div", - name_cls="name", - pty_elt="em", - ), - 97: ScrapBasicArgument( - pf_memlistelt="ul", - pf_memlistcls="memberList", - pf_elt="li", - name_elt="strong", - pty_wrapelt="a", - pty_elt="tr", - ), - 98: ScrapBasicArgument( - pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" - ), - 99: ScrapBasicArgument( - pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" - ), - 100: ScrapBasicArgument( - pf_elt="div", pf_cls="list", name_elt="h4", name_cls="h0", pty_elt="li" - ), - # 경기 - 광주 - 101: ScrapBasicArgument( - pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" - ), - 102: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="em", - name_cls="name", - pty_wrapelt="a", - pty_wrapcls="start", - pty_elt="li", - ), - 103: ScrapBasicArgument( - pf_elt="div", - pf_cls="col-sm-6", - name_elt="h5", - name_cls="h5", - pty_wrapelt="a", - pty_wrapcls="d-inline-block", - pty_elt="li", - ), - 104: ScrapBasicArgument( - pf_elt="div", - pf_cls="text_box", - name_elt="h3", - name_cls="h0", - pty_wrapelt="a", - pty_wraptxt="누리집", - pty_elt="li", - ), - 105: ScrapBasicArgument( - pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" - ), - # 강원 - # 106 : 정당정보 없음 - # 107 : scrap_gangwon.py - 108: ScrapBasicArgument( - pf_elt="dl", pf_cls="profile", name_elt="strong", pty_elt="li" - ), - 109: ScrapBasicArgument( - pf_memlistelt="section", - pf_memlistcls="memberName", - pf_elt="dl", - name_elt="dd", - name_cls="name", - pty_elt="span", - ), - 110: ScrapBasicArgument( - pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" - ), - # 111 : TODO! 정당 없고 홈페이지는 깨짐 - 112: ScrapBasicArgument( - pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em" - ), - 113: ScrapBasicArgument( - pf_elt="div", pf_cls="profile", name_cls="name", pty_elt="li" - ), - 115: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="div", - name_cls="name", - pty_elt="li", - ), - # TODO : 정당이 주석처리되어 있어서 soup가 인식을 못함. - 116: ScrapBasicArgument( - pf_elt="div", pf_cls="memberName", name_cls="name", pty_elt="dd" - ), - 127: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="em", - name_cls="name", - pty_elt="span", - ), - 128: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="em", - name_cls="name", - pty_elt="span", - ), - 129: ScrapBasicArgument( - pf_memlistelt="ul", - pf_memlistcls="memberList", - pf_elt="div", - pf_cls="right", - name_elt="h5", - pty_elt="span", - ), - 130: ScrapBasicArgument( - pf_elt="div", - pf_cls="parliament_text", - name_elt="h3", - name_cls="h0", - pty_elt="span", - ), - 131: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="em", - name_cls="name", - pty_elt="span", - ), - 133: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="div", - name_cls="name", - pty_elt="span", - ), - 135: ScrapBasicArgument( - pf_elt="dl", - name_elt="dd", - name_cls="name", - pty_elt="span", - pty_cls="itemContent", - ), - 136: ScrapBasicArgument( - pf_elt="div", - pf_cls="info", - name_elt="div", - name_cls="tit", - pty_elt="li", - ), - 137: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="em", - name_cls="name", - pty_elt="span", - ), - 138: ScrapBasicArgument( - pf_elt="div", - pf_cls="item", - name_elt="h5", - name_cls="name", - pty_elt="span", - ), - # TODO: 139 크롤링 실패 - # HTTPSConnectionPool(host='www.scc.go.kr', port=443): Max retries exceeded with url: /index.php?MenuID=48 (Caused by SSLError(SSLError(1, '[SSL: DH_KEY_TOO_SMALL] dh key too small (_ssl.c:1007)'))) - 139: ScrapBasicArgument( - pf_elt="ul", - pf_cls="assembly_list", - name_elt="div", - name_cls="names", - pty_elt="li", - ), - 141: ScrapBasicArgument( - pf_elt="dl", - name_elt="dd", - name_cls="name", - pty_elt="span", - pty_cls="itemContent", - ), - 143: ScrapBasicArgument( - pf_elt="div", - pf_cls="card--body", - name_elt="strong", - name_cls="ui-list__title", - pty_elt="li", - ), - # TODO: 144 크롤링 실패 - # HTTPSConnectionPool(host='council.dangjin.go.kr', port=443): Max retries exceeded with url: /content/member/member.html (Caused by SSLError(SSLError(1, '[SSL: SSLV3_ALERT_HANDSHAKE_FAILURE] sslv3 alert handshake failure (_ssl.c:1007)'))) - 144: ScrapBasicArgument( - pf_memlistelt="ul", - pf_memlistcls="mlist", - pf_elt="dl", - name_elt="dd", - name_cls="name", - pty_elt="span", - pty_cls="itemContent", - ), - # TODO: 145 크롤링 실패 - # HTTPSConnectionPool(host='council.buyeo.go.kr', port=443): Max retries exceeded with url: /kr/member/active2.do (Caused by SSLError(SSLError(1, '[SSL: SSLV3_ALERT_HANDSHAKE_FAILURE] sslv3 alert handshake failure (_ssl.c:1007)'))) - 145: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="em", - name_cls="name", - pty_elt="span", - ), - # TODO: 146 크롤링 실패 - # HTTPSConnectionPool(host='www.scouncil.go.kr', port=443): Max retries exceeded with url: /kr/prog/asemby/sub02_01/list.do (Caused by SSLError(SSLError(1, '[SSL: WRONG_SIGNATURE_TYPE] wrong signature type (_ssl.c:1007)'))) - 146: ScrapBasicArgument( - pf_elt="div", - pf_cls="pt", - name_elt="strong", - name_cls="name", - pty_elt="li", - ), - 147: ScrapBasicArgument( - pf_elt="dl", - pf_cls="profile", - name_elt="strong", - name_cls="name", - pty_elt="span", - ), - 148: ScrapBasicArgument( - pf_elt="div", - pf_cls="col", - name_elt="strong", - name_cls="ui-list__title", - pty_elt="li", - ), - 149: ScrapBasicArgument( - pf_elt="dl", - pf_cls="profile", - name_elt="strong", - name_cls="name", - pty_elt="span", - ), - 150: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="strong", - name_cls="name", - pty_elt="span", - ), - 151: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="em", - name_cls="name", - pty_elt="span", - ), - 152: ScrapBasicArgument( - pf_elt="div", - pf_cls="councillor_info", - name_elt="span", - name_cls="name", - pty_elt="li", - ), - 153: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="em", - name_cls="name", - pty_elt="span", - ), - 158: ScrapBasicArgument( - pf_memlistelt="ul", - pf_memlistcls="mlist", - pf_elt="dl", - name_elt="dd", - name_cls="name", - pty_elt="span", - pty_cls="itemContent", - ), - 159: ScrapBasicArgument( - pf_memlistelt="ul", - pf_memlistcls="mlist", - pf_elt="dl", - name_elt="dd", - name_cls="name", - pty_elt="span", - pty_cls="itemContent", - ), - 166: ScrapBasicArgument( - pf_elt="div", - pf_cls="profile", - name_elt="strong", - pty_elt="span", - ), - 168: ScrapBasicArgument( - pf_memlistelt="ul", - pf_memlistcls="mlist", - pf_elt="dl", - name_elt="dd", - name_cls="name", - pty_elt="span", - pty_cls="itemContent", - ), - } - + f = open(JSON_PATH, 'r') + args = json.load(f) + f.close() + # 데이터 가져오기 data: list[dict] = worksheet.get_all_records() result: str = "" @@ -619,7 +85,7 @@ def main() -> None: parse_error_times = 0 timeouts = 0 N = 226 - for n in range(57, 113): + for n in range(1, 57): if n in no_information: print( f"| {n} | 오류: 지난번 확인 시, 정당 정보 등이 홈페이지에 없었습니다." "다시 확인해보시겠어요? 링크 : ", @@ -631,17 +97,20 @@ def main() -> None: result = None try: council_url = data[n - 1]["URL"] - council_args = args[n] if n in args.keys() else None + council_args = args.get(str(n), None) + if council_args is not None: + council_args = ScrapBasicArgument(**council_args) + # council_args = args[n] if n in args.keys() else None if n in special_functions: function_name = f"scrap_{n}" if hasattr(sys.modules[__name__], function_name): function_to_call = getattr(sys.modules[__name__], function_name) if n < 57 or n in [62, 63, 64, 107]: - result = str(function_to_call(council_url).councilors) + result = str(function_to_call(council_url, n).councilors) else: result = str( - function_to_call(council_url, args=council_args).councilors + function_to_call(council_url, n, args=council_args).councilors ) else: result = str( diff --git a/scrap/utils/types.py b/scrap/utils/types.py index 9cbeb5e..13c9a42 100644 --- a/scrap/utils/types.py +++ b/scrap/utils/types.py @@ -44,7 +44,7 @@ class ScrapResult: 의회 크롤링 결과를 나타내는 타입입니다. """ - council_id: str + council_id: int """ 의회를 구분하기 위한 문자열입니다. """ From b31c676b9d5339f6b8081be5829a7520eb1609e6 Mon Sep 17 00:00:00 2001 From: Re-st Date: Wed, 8 Nov 2023 12:15:19 +0000 Subject: [PATCH 09/12] Formatted with black --- scrap/local_councils/__init__.py | 3 +- scrap/local_councils/basic.py | 2 + scrap/local_councils/busan.py | 51 ++++++----------- scrap/local_councils/chungcheong.py | 28 ++++++---- scrap/local_councils/daegu.py | 40 +++++--------- scrap/local_councils/daejeon.py | 19 +++---- scrap/local_councils/gangwon.py | 73 ++++++++++++------------- scrap/local_councils/gwangju.py | 22 +++----- scrap/local_councils/gyeonggi.py | 10 +++- scrap/local_councils/incheon.py | 33 ++++------- scrap/local_councils/jeolla.py | 67 ++++++++++++----------- scrap/local_councils/seoul.py | 85 +++++++++++------------------ scrap/local_councils/ulsan.py | 13 ++--- scrap/utils/spreadsheet.py | 9 ++- 14 files changed, 197 insertions(+), 258 deletions(-) diff --git a/scrap/local_councils/__init__.py b/scrap/local_councils/__init__.py index 9c6b97e..cd5cf85 100644 --- a/scrap/local_councils/__init__.py +++ b/scrap/local_councils/__init__.py @@ -10,9 +10,10 @@ from scrap.utils.types import CouncilType from scrap.utils.utils import getPartyList + def returncouncilors(cid, councilors): return ScrapResult( council_id=cid, council_type=CouncilType.LOCAL_COUNCIL, councilors=councilors, - ) \ No newline at end of file + ) diff --git a/scrap/local_councils/basic.py b/scrap/local_councils/basic.py index 7354217..5ce26e2 100644 --- a/scrap/local_councils/basic.py +++ b/scrap/local_councils/basic.py @@ -7,6 +7,7 @@ party_keywords = getPartyList() party_keywords.append("무소속") + def find(soup, element, class_): if class_ is None: return soup.find(element) @@ -141,6 +142,7 @@ def get_party_easy(profile, wrapper_element, wrapper_class_, wrapper_txt, url): assert party is not None return party + def scrap_basic(url, cid, args: ScrapBasicArgument, encoding="utf-8") -> ScrapResult: """의원 상세약력 스크랩 :param url: 의원 목록 사이트 url diff --git a/scrap/local_councils/busan.py b/scrap/local_councils/busan.py index 42b6159..f32ab81 100644 --- a/scrap/local_councils/busan.py +++ b/scrap/local_councils/busan.py @@ -4,9 +4,8 @@ from scrap.local_councils import * from scrap.local_councils.basic import returncouncilors -def scrap_26( - url, cid -) -> ScrapResult: + +def scrap_26(url, cid) -> ScrapResult: """부산 중구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -28,10 +27,8 @@ def scrap_26( return returncouncilors(cid, councilors) -def scrap_27( - url, - cid -) -> ScrapResult: + +def scrap_27(url, cid) -> ScrapResult: """부산 서구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -60,9 +57,7 @@ def scrap_27( returncouncilors(cid, councilors) -def scrap_28( - url, cid -) -> ScrapResult: +def scrap_28(url, cid) -> ScrapResult: """부산 동구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -102,9 +97,7 @@ def scrap_29(url, cid) -> ScrapResult: return returncouncilors(cid, councilors) -def scrap_30( - url, cid -) -> ScrapResult: +def scrap_30(url, cid) -> ScrapResult: """부산 부산진구""" soup = get_soup(url, verify=False).find("ul", class_="mlist") councilors: list[Councilor] = [] @@ -125,9 +118,7 @@ def scrap_30( return returncouncilors(cid, councilors) -def scrap_31( - url, cid -) -> ScrapResult: +def scrap_31(url, cid) -> ScrapResult: """부산 동래구""" soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -169,8 +160,7 @@ def scrap_32(url, cid) -> ScrapResult: return returncouncilors(cid, councilors) -def scrap_33( -url, cid) -> ScrapResult: +def scrap_33(url, cid) -> ScrapResult: """부산 북구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -189,8 +179,7 @@ def scrap_33( return returncouncilors(cid, councilors) -def scrap_34( -url, cid) -> ScrapResult: +def scrap_34(url, cid) -> ScrapResult: """부산 해운대구""" soup = get_soup(url, verify=False).find("div", class_="initial_list") councilors: list[Councilor] = [] @@ -217,8 +206,7 @@ def scrap_34( return returncouncilors(cid, councilors) -def scrap_35( -url, cid) -> ScrapResult: +def scrap_35(url, cid) -> ScrapResult: """부산 기장군""" soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -239,8 +227,7 @@ def scrap_35( return returncouncilors(cid, councilors) -def scrap_36( -url, cid) -> ScrapResult: +def scrap_36(url, cid) -> ScrapResult: """부산 사하구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -259,8 +246,7 @@ def scrap_36( return returncouncilors(cid, councilors) -def scrap_37( -url, cid) -> ScrapResult: +def scrap_37(url, cid) -> ScrapResult: """부산 금정구""" soup = get_soup(url, verify=False).find("div", class_="council_list") councilors: list[Councilor] = [] @@ -281,8 +267,7 @@ def scrap_37( return returncouncilors(cid, councilors) -def scrap_38( -url, cid) -> ScrapResult: +def scrap_38(url, cid) -> ScrapResult: """부산 강서구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -305,8 +290,7 @@ def scrap_38( return returncouncilors(cid, councilors) -def scrap_39( -url, cid) -> ScrapResult: +def scrap_39(url, cid) -> ScrapResult: """부산 연제구""" councilors: list[Councilor] = [] @@ -338,8 +322,7 @@ def scrap_39( return returncouncilors(cid, councilors) -def scrap_40( -url, cid) -> ScrapResult: +def scrap_40(url, cid) -> ScrapResult: """부산 수영구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -358,9 +341,7 @@ def scrap_40( return returncouncilors(cid, councilors) -def scrap_41( - url, cid -) -> ScrapResult: +def scrap_41(url, cid) -> ScrapResult: """부산 사상구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] diff --git a/scrap/local_councils/chungcheong.py b/scrap/local_councils/chungcheong.py index caf8323..962f845 100644 --- a/scrap/local_councils/chungcheong.py +++ b/scrap/local_councils/chungcheong.py @@ -1,8 +1,10 @@ from scrap.local_councils import * from scrap.local_councils.basic import * + def scrap_124( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: """충청북도 청주시 페이지에서 의원 상세약력 스크랩 @@ -32,7 +34,9 @@ def scrap_124( def scrap_125( -url, cid, args: ScrapBasicArgument = None, + url, + cid, + args: ScrapBasicArgument = None, ) -> ScrapResult: """충청북도 충주시 페이지에서 의원 상세약력 스크랩 @@ -60,7 +64,9 @@ def scrap_125( def scrap_126( -url, cid, args: ScrapBasicArgument = None, + url, + cid, + args: ScrapBasicArgument = None, ) -> ScrapResult: """충청북도 제천시 페이지에서 의원 상세약력 스크랩 @@ -87,7 +93,9 @@ def scrap_126( def scrap_132( -url, cid, args: ScrapBasicArgument = None, + url, + cid, + args: ScrapBasicArgument = None, ) -> ScrapResult: """충청북도 제천시 페이지에서 의원 상세약력 스크랩 @@ -115,7 +123,9 @@ def scrap_132( def scrap_134( -url, cid, args: ScrapBasicArgument = None, + url, + cid, + args: ScrapBasicArgument = None, ) -> ScrapResult: """충청북도 증평군 페이지에서 의원 상세약력 스크랩 @@ -143,9 +153,7 @@ def scrap_134( return returncouncilors(cid, councilors) -def scrap_140( -url, cid, args: ScrapBasicArgument = None -) -> ScrapResult: +def scrap_140(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: """충청남도 태안군 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url @@ -168,9 +176,7 @@ def scrap_140( return returncouncilors(cid, councilors) -def scrap_142( -url, cid, args -) -> ScrapResult: +def scrap_142(url, cid, args) -> ScrapResult: """충청남도 논산시""" base_url = "https://www.nonsancl.go.kr/kr/member/profile_popup?uid=" soup = get_soup(url) diff --git a/scrap/local_councils/daegu.py b/scrap/local_councils/daegu.py index 4ed28cb..c0b96df 100644 --- a/scrap/local_councils/daegu.py +++ b/scrap/local_councils/daegu.py @@ -1,7 +1,7 @@ from scrap.local_councils import * -def scrap_42( -url, cid) -> ScrapResult: + +def scrap_42(url, cid) -> ScrapResult: """대구 중구""" soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -22,10 +22,8 @@ def scrap_42( return returncouncilors(cid, councilors) -def scrap_43( -url, cid) -> ScrapResult: - """대구 동구 - """ +def scrap_43(url, cid) -> ScrapResult: + """대구 동구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -56,8 +54,7 @@ def scrap_43( def scrap_44(url, cid) -> ScrapResult: - """대구 서구 - """ + """대구 서구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -79,10 +76,8 @@ def scrap_44(url, cid) -> ScrapResult: return returncouncilors(cid, councilors) -def scrap_45( -url, cid) -> ScrapResult: - """대구 남구 - """ +def scrap_45(url, cid) -> ScrapResult: + """대구 남구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -103,8 +98,7 @@ def scrap_45( def scrap_46(url, cid) -> ScrapResult: - """대구 북구 - """ + """대구 북구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -124,10 +118,8 @@ def scrap_46(url, cid) -> ScrapResult: return returncouncilors(cid, councilors) -def scrap_47( -url, cid) -> ScrapResult: - """대구 수성구 - """ +def scrap_47(url, cid) -> ScrapResult: + """대구 수성구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -145,10 +137,8 @@ def scrap_47( return returncouncilors(cid, councilors) -def scrap_48( -url, cid) -> ScrapResult: - """대구 달서구 - """ +def scrap_48(url, cid) -> ScrapResult: + """대구 달서구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -169,10 +159,8 @@ def scrap_48( return returncouncilors(cid, councilors) -def scrap_49( -url, cid) -> ScrapResult: - """대구 달성군 - """ +def scrap_49(url, cid) -> ScrapResult: + """대구 달성군""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] diff --git a/scrap/local_councils/daejeon.py b/scrap/local_councils/daejeon.py index b0ae78b..992a026 100644 --- a/scrap/local_councils/daejeon.py +++ b/scrap/local_councils/daejeon.py @@ -1,8 +1,8 @@ from scrap.local_councils import * + def scrap_65(url, cid) -> ScrapResult: - """대전 동구 - """ + """대전 동구""" soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -35,8 +35,7 @@ def scrap_65(url, cid) -> ScrapResult: def scrap_66(url, cid) -> ScrapResult: - """대전 중구 - """ + """대전 중구""" soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -54,10 +53,10 @@ def scrap_66(url, cid) -> ScrapResult: def scrap_67( - url, cid, + url, + cid, ) -> ScrapResult: - """대전 서구 - """ + """대전 서구""" soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -78,8 +77,7 @@ def scrap_67( def scrap_68(url, cid) -> ScrapResult: - """대전 유성구 - """ + """대전 유성구""" soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -99,8 +97,7 @@ def scrap_68(url, cid) -> ScrapResult: def scrap_69(url, cid) -> ScrapResult: - """대전 대덕구 - """ + """대전 대덕구""" soup = get_soup(url, verify=False) councilors: List[Councilor] = [] diff --git a/scrap/local_councils/gangwon.py b/scrap/local_councils/gangwon.py index ed2b29f..365413e 100644 --- a/scrap/local_councils/gangwon.py +++ b/scrap/local_councils/gangwon.py @@ -14,10 +14,10 @@ def scrap_107( - url, cid, + url, + cid, ) -> ScrapResult: - """강원도 원주시 - """ + """강원도 원주시""" councilors: list[Councilor] = [] driver_loc = os.popen("which chromedriver").read().strip() @@ -55,11 +55,8 @@ def scrap_107( return returncouncilors(cid, councilors) -def scrap_113( - url, cid, args: ScrapBasicArgument = None -) -> ScrapResult: - """강원도 속초시 - """ +def scrap_113(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: + """강원도 속초시""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -80,11 +77,11 @@ def scrap_113( def scrap_114( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 고성군 - """ + """강원도 고성군""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -107,11 +104,11 @@ def scrap_114( def scrap_115( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 양양군 - """ + """강원도 양양군""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -132,11 +129,11 @@ def scrap_115( def scrap_116( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 인제군 - """ + """강원도 인제군""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -153,11 +150,11 @@ def scrap_116( def scrap_117( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 홍천군 - """ + """강원도 홍천군""" soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -179,11 +176,11 @@ def scrap_117( def scrap_118( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 횡성군 - """ + """강원도 횡성군""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -200,11 +197,11 @@ def scrap_118( def scrap_119( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 영월군 - """ + """강원도 영월군""" base_url = "https://council.yw.go.kr" soup = get_soup(url) councilors: list[Councilor] = [] @@ -233,11 +230,11 @@ def scrap_119( def scrap_120( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 평창군 - """ + """강원도 평창군""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -256,11 +253,11 @@ def scrap_120( def scrap_121( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 화천군 - """ + """강원도 화천군""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -277,11 +274,11 @@ def scrap_121( def scrap_122( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 양구군 - """ + """강원도 양구군""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -298,11 +295,11 @@ def scrap_122( def scrap_123( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """강원도 철원군 - """ + """강원도 철원군""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] diff --git a/scrap/local_councils/gwangju.py b/scrap/local_councils/gwangju.py index aa3c15b..efc0875 100644 --- a/scrap/local_councils/gwangju.py +++ b/scrap/local_councils/gwangju.py @@ -6,15 +6,13 @@ from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options from scrap.local_councils import * + party_keywords = getPartyList() party_keywords.append("무소속") -def scrap_62( - url, cid -) -> ScrapResult: - """광주 서구 - """ +def scrap_62(url, cid) -> ScrapResult: + """광주 서구""" councilors: list[Councilor] = [] driver_loc = os.popen("which chromedriver").read().strip() @@ -57,11 +55,8 @@ def scrap_62( return returncouncilors(cid, councilors) -def scrap_63( - url, cid -) -> ScrapResult: - """광주 북구 - """ +def scrap_63(url, cid) -> ScrapResult: + """광주 북구""" councilors: list[Councilor] = [] driver_loc = os.popen("which chromedriver").read().strip() @@ -96,11 +91,8 @@ def scrap_63( return returncouncilors(cid, councilors) -def scrap_64( - url, cid -) -> ScrapResult: - """광주 광산구 - """ +def scrap_64(url, cid) -> ScrapResult: + """광주 광산구""" councilors: list[Councilor] = [] driver_loc = os.popen("which chromedriver").read().strip() diff --git a/scrap/local_councils/gyeonggi.py b/scrap/local_councils/gyeonggi.py index 4641ea4..21b8cdb 100644 --- a/scrap/local_councils/gyeonggi.py +++ b/scrap/local_councils/gyeonggi.py @@ -1,7 +1,15 @@ """경기도를 스크랩. """ from scrap.local_councils import * -from scrap.local_councils.basic import find, regex_pattern, find_all, extract_party, get_name, get_party_easy +from scrap.local_councils.basic import ( + find, + regex_pattern, + find_all, + extract_party, + get_name, + get_party_easy, +) + def get_profiles_88_103(soup, element, class_, memberlistelement, memberlistclass_): if memberlistelement is not None: diff --git a/scrap/local_councils/incheon.py b/scrap/local_councils/incheon.py index c5b49a8..e2eb65e 100644 --- a/scrap/local_councils/incheon.py +++ b/scrap/local_councils/incheon.py @@ -10,9 +10,9 @@ ) from scrap.local_councils import * + def scrap_50(url, cid) -> ScrapResult: - """인천 중구 - """ + """인천 중구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -27,8 +27,7 @@ def scrap_50(url, cid) -> ScrapResult: def scrap_51(url, cid) -> ScrapResult: - """인천 동구 - """ + """인천 동구""" raise Exception("현재 인천 동구의회 사이트는 SSLV3_ALERT_HANDSHAKE_FAILURE 에러가 발생합니다") # soup = get_soup(url, verify=False) @@ -57,11 +56,8 @@ def scrap_51(url, cid) -> ScrapResult: # return returncouncilors(cid, councilors) -def scrap_52( - url, cid -) -> ScrapResult: - """인천 미추홀구 - """ +def scrap_52(url, cid) -> ScrapResult: + """인천 미추홀구""" councilors: list[Councilor] = [] browser = get_selenium(url) @@ -85,8 +81,7 @@ def scrap_52( def scrap_53(url, cid) -> ScrapResult: - """인천 연수구 - """ + """인천 연수구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -107,8 +102,7 @@ def scrap_53(url, cid) -> ScrapResult: def scrap_54(url, cid) -> ScrapResult: - """인천 남동구 - """ + """인천 남동구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -127,8 +121,7 @@ def scrap_54(url, cid) -> ScrapResult: def scrap_55(url, cid) -> ScrapResult: - """인천 부평구 - """ + """인천 부평구""" raise Exception("현재 인천 부평구의회 사이트는 SSLV3_ALERT_HANDSHAKE_FAILURE 에러가 발생합니다") # soup = get_soup(url, verify=False) @@ -152,11 +145,8 @@ def scrap_55(url, cid) -> ScrapResult: # ) -def scrap_56( - url, cid -) -> ScrapResult: - """인천 계양구 - """ +def scrap_56(url, cid) -> ScrapResult: + """인천 계양구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -176,8 +166,7 @@ def scrap_56( def scrap_57(url, args) -> ScrapResult: - """인천 서구 - """ + """인천 서구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] cid = 57 diff --git a/scrap/local_councils/jeolla.py b/scrap/local_councils/jeolla.py index 86ef7eb..0c73bc7 100644 --- a/scrap/local_councils/jeolla.py +++ b/scrap/local_councils/jeolla.py @@ -1,11 +1,12 @@ from scrap.local_councils import * + def scrap_154( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라북도 남원시 - """ + """전라북도 남원시""" soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -22,11 +23,11 @@ def scrap_154( def scrap_155( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라북도 김제시 - """ + """전라북도 김제시""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -43,11 +44,11 @@ def scrap_155( def scrap_156( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라북도 완주군 - """ + """전라북도 완주군""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] memberlist = soup.find("div", class_="card-member") @@ -65,11 +66,11 @@ def scrap_156( def scrap_157( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라북도 진안군 - """ + """전라북도 진안군""" soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -86,31 +87,31 @@ def scrap_157( def scrap_160( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라북도 임실군 - """ + """전라북도 임실군""" # TODO: js로 동적으로 읽어옴 raise NotImplementedError def scrap_161( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라북도 순창군 - """ + """전라북도 순창군""" # TODO: js로 동적으로 읽어옴 raise NotImplementedError def scrap_162( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라북도 고창군 - """ + """전라북도 고창군""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -127,11 +128,11 @@ def scrap_162( def scrap_163( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라북도 부안군 - """ + """전라북도 부안군""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -154,11 +155,11 @@ def scrap_163( def scrap_164( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라남도 목포시 - """ + """전라남도 목포시""" base_url = "https://council.mokpo.go.kr/" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -182,11 +183,11 @@ def scrap_164( def scrap_165( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라남도 여수시 - """ + """전라남도 여수시""" soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -204,11 +205,11 @@ def scrap_165( def scrap_167( - url, cid, + url, + cid, args: ScrapBasicArgument = None, ) -> ScrapResult: - """전라북도 나주시 - """ + """전라북도 나주시""" soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] diff --git a/scrap/local_councils/seoul.py b/scrap/local_councils/seoul.py index 33fc2e4..c64a265 100644 --- a/scrap/local_councils/seoul.py +++ b/scrap/local_councils/seoul.py @@ -7,11 +7,12 @@ from scrap.utils.requests import get_soup from scrap.local_councils import * + def scrap_1( - url, cid, + url, + cid, ) -> ScrapResult: - """서울 종로구 - """ + """서울 종로구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -30,8 +31,7 @@ def scrap_1( def scrap_2(url, cid) -> ScrapResult: - """서울 중구 - """ + """서울 중구""" parliment_soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -60,8 +60,7 @@ def scrap_2(url, cid) -> ScrapResult: def scrap_3(url, cid) -> ScrapResult: - """서울 용산구 - """ + """서울 용산구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -81,8 +80,7 @@ def scrap_3(url, cid) -> ScrapResult: def scrap_4(url, cid) -> ScrapResult: - """서울 성동구 - """ + """서울 성동구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -101,8 +99,7 @@ def scrap_4(url, cid) -> ScrapResult: def scrap_5(url, cid) -> ScrapResult: - """서울 광진구 - """ + """서울 광진구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -123,8 +120,7 @@ def scrap_5(url, cid) -> ScrapResult: def scrap_6(url, cid) -> ScrapResult: - """서울 동대문구 - """ + """서울 동대문구""" parliment_soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -158,8 +154,7 @@ def scrap_6(url, cid) -> ScrapResult: def scrap_7(url, cid) -> ScrapResult: - """서울 중랑구 - """ + """서울 중랑구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -178,8 +173,7 @@ def scrap_7(url, cid) -> ScrapResult: def scrap_8(url, cid) -> ScrapResult: - """서울 성북구 - """ + """서울 성북구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -200,8 +194,7 @@ def scrap_8(url, cid) -> ScrapResult: def scrap_9(url, cid) -> ScrapResult: - """서울 강북구 - """ + """서울 강북구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -224,10 +217,10 @@ def scrap_9(url, cid) -> ScrapResult: def scrap_10( - url, cid, + url, + cid, ) -> ScrapResult: - """서울 도봉구 - """ + """서울 도봉구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -248,8 +241,7 @@ def scrap_10( def scrap_11(url, cid) -> ScrapResult: - """서울 노원구 - """ + """서울 노원구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -270,8 +262,7 @@ def scrap_11(url, cid) -> ScrapResult: def scrap_12(url, cid) -> ScrapResult: - """서울 은평구 - """ + """서울 은평구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -294,10 +285,10 @@ def scrap_12(url, cid) -> ScrapResult: def scrap_13( - url, cid, + url, + cid, ) -> ScrapResult: - """서울 서대문구 - """ + """서울 서대문구""" soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -323,8 +314,7 @@ def scrap_13( def scrap_14(url, cid) -> ScrapResult: - """서울 마포구 - """ + """서울 마포구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -343,8 +333,7 @@ def scrap_14(url, cid) -> ScrapResult: def scrap_15(url, cid) -> ScrapResult: - """서울 양천구 - """ + """서울 양천구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -377,8 +366,7 @@ def scrap_15(url, cid) -> ScrapResult: def scrap_16(url, cid) -> ScrapResult: - """서울 강서구 - """ + """서울 강서구""" soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -403,8 +391,7 @@ def scrap_16(url, cid) -> ScrapResult: def scrap_17(url, cid) -> ScrapResult: - """서울 구로구 - """ + """서울 구로구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -427,8 +414,7 @@ def scrap_17(url, cid) -> ScrapResult: def scrap_18(url, cid) -> ScrapResult: - """서울 금천구 - """ + """서울 금천구""" soup = get_soup(url, verify=False, encoding="euc-kr") councilors: list[Councilor] = [] @@ -449,8 +435,7 @@ def scrap_18(url, cid) -> ScrapResult: def scrap_19(url, cid) -> ScrapResult: - """서울 영등포구 - """ + """서울 영등포구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -469,8 +454,7 @@ def scrap_19(url, cid) -> ScrapResult: def scrap_20(url, cid) -> ScrapResult: - """서울 동작구 - """ + """서울 동작구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -489,8 +473,7 @@ def scrap_20(url, cid) -> ScrapResult: def scrap_21(url, cid) -> ScrapResult: - """서울 관악구 - """ + """서울 관악구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -511,8 +494,7 @@ def scrap_21(url, cid) -> ScrapResult: def scrap_22(url, cid) -> ScrapResult: - """서울 서초구 - """ + """서울 서초구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -533,8 +515,7 @@ def scrap_22(url, cid) -> ScrapResult: def scrap_23(url, cid) -> ScrapResult: - """서울 강남구 - """ + """서울 강남구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -557,8 +538,7 @@ def scrap_23(url, cid) -> ScrapResult: def scrap_24(url, cid) -> ScrapResult: - """서울 송파구 - """ + """서울 송파구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] @@ -577,8 +557,7 @@ def scrap_24(url, cid) -> ScrapResult: def scrap_25(url, cid) -> ScrapResult: - """서울 강동구 - """ + """서울 강동구""" soup = get_soup(url, verify=False) councilors: list[Councilor] = [] diff --git a/scrap/local_councils/ulsan.py b/scrap/local_councils/ulsan.py index a349733..d74692b 100644 --- a/scrap/local_councils/ulsan.py +++ b/scrap/local_councils/ulsan.py @@ -1,9 +1,8 @@ from scrap.local_councils import * from scrap.local_councils.basic import regex_pattern -def scrap_70( - url, cid -) -> ScrapResult: + +def scrap_70(url, cid) -> ScrapResult: """울산 중구""" soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -28,9 +27,7 @@ def scrap_70( return returncouncilors(cid, councilors) -def scrap_71( - url, cid -) -> ScrapResult: +def scrap_71(url, cid) -> ScrapResult: """울산 남구""" soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -57,9 +54,7 @@ def scrap_71( return returncouncilors(cid, councilors) -def scrap_72( - url, cid -) -> ScrapResult: +def scrap_72(url, cid) -> ScrapResult: """울산 동구""" soup = get_soup(url, verify=False, encoding="euc-kr") councilors: List[Councilor] = [] diff --git a/scrap/utils/spreadsheet.py b/scrap/utils/spreadsheet.py index 32d8534..1d3621e 100644 --- a/scrap/utils/spreadsheet.py +++ b/scrap/utils/spreadsheet.py @@ -27,6 +27,7 @@ BASE_DIR = os.path.join(PWD, os.pardir, os.pardir) JSON_PATH = os.path.join(PWD, "scrap_args.json") + def google_authorization(): """Google Sheets API 활용을 위한 인증 정보 요청 credentials.json 파일을 토대로 인증을 요청하되, token.json 파일이 존재할 경우 거기에 저장된 정보 활용 @@ -74,10 +75,10 @@ def main() -> None: ) no_information = [106, 111] errors = [] - f = open(JSON_PATH, 'r') + f = open(JSON_PATH, "r") args = json.load(f) f.close() - + # 데이터 가져오기 data: list[dict] = worksheet.get_all_records() result: str = "" @@ -110,7 +111,9 @@ def main() -> None: result = str(function_to_call(council_url, n).councilors) else: result = str( - function_to_call(council_url, n, args=council_args).councilors + function_to_call( + council_url, n, args=council_args + ).councilors ) else: result = str( From f5d0042fecb4907c24c69b52e47bdd461fdd11c4 Mon Sep 17 00:00:00 2001 From: Re-st Date: Wed, 8 Nov 2023 21:10:30 +0900 Subject: [PATCH 10/12] =?UTF-8?q?[scrap]=20returncouncilors=20=ED=95=A8?= =?UTF-8?q?=EC=88=98=EC=9D=B4=EB=A6=84=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scrap/local_councils/__init__.py | 3 +- scrap/local_councils/basic.py | 2 +- scrap/local_councils/busan.py | 35 +++++++------ scrap/local_councils/chungcheong.py | 78 ++++++++--------------------- scrap/local_councils/daegu.py | 16 +++--- scrap/local_councils/daejeon.py | 10 ++-- scrap/local_councils/gangwon.py | 26 +++++----- scrap/local_councils/gwangju.py | 6 +-- scrap/local_councils/gyeonggi.py | 4 +- scrap/local_councils/incheon.py | 15 +++--- scrap/local_councils/jeolla.py | 18 +++---- scrap/local_councils/seoul.py | 52 +++++++++---------- scrap/local_councils/ulsan.py | 10 ++-- 13 files changed, 115 insertions(+), 160 deletions(-) diff --git a/scrap/local_councils/__init__.py b/scrap/local_councils/__init__.py index cd5cf85..5868716 100644 --- a/scrap/local_councils/__init__.py +++ b/scrap/local_councils/__init__.py @@ -10,8 +10,7 @@ from scrap.utils.types import CouncilType from scrap.utils.utils import getPartyList - -def returncouncilors(cid, councilors): +def ret_local_councilors(cid, councilors): return ScrapResult( council_id=cid, council_type=CouncilType.LOCAL_COUNCIL, diff --git a/scrap/local_councils/basic.py b/scrap/local_councils/basic.py index 5ce26e2..dffddf7 100644 --- a/scrap/local_councils/basic.py +++ b/scrap/local_councils/basic.py @@ -189,7 +189,7 @@ def scrap_basic(url, cid, args: ScrapBasicArgument, encoding="utf-8") -> ScrapRe raise RuntimeError("[basic.py] 의원 정당을 가져오는데 실패했습니다. 이유: " + str(e)) councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) if __name__ == "__main__": diff --git a/scrap/local_councils/busan.py b/scrap/local_councils/busan.py index f32ab81..8240415 100644 --- a/scrap/local_councils/busan.py +++ b/scrap/local_councils/busan.py @@ -2,8 +2,7 @@ from scrap.utils.requests import get_selenium, By from scrap.local_councils import * -from scrap.local_councils.basic import returncouncilors - +from scrap.local_councils.basic import ret_local_councilors def scrap_26(url, cid) -> ScrapResult: """부산 중구""" @@ -25,7 +24,7 @@ def scrap_26(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_27(url, cid) -> ScrapResult: @@ -54,7 +53,7 @@ def scrap_27(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - returncouncilors(cid, councilors) + ret_local_councilors(cid, councilors) def scrap_28(url, cid) -> ScrapResult: @@ -73,7 +72,7 @@ def scrap_28(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - returncouncilors(cid, councilors) + ret_local_councilors(cid, councilors) def scrap_29(url, cid) -> ScrapResult: @@ -94,7 +93,7 @@ def scrap_29(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_30(url, cid) -> ScrapResult: @@ -115,7 +114,7 @@ def scrap_30(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_31(url, cid) -> ScrapResult: @@ -133,7 +132,7 @@ def scrap_31(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_32(url, cid) -> ScrapResult: @@ -157,7 +156,7 @@ def scrap_32(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_33(url, cid) -> ScrapResult: @@ -176,7 +175,7 @@ def scrap_33(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_34(url, cid) -> ScrapResult: @@ -203,7 +202,7 @@ def scrap_34(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_35(url, cid) -> ScrapResult: @@ -224,7 +223,7 @@ def scrap_35(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_36(url, cid) -> ScrapResult: @@ -243,7 +242,7 @@ def scrap_36(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_37(url, cid) -> ScrapResult: @@ -264,7 +263,7 @@ def scrap_37(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_38(url, cid) -> ScrapResult: @@ -287,7 +286,7 @@ def scrap_38(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_39(url, cid) -> ScrapResult: @@ -319,7 +318,7 @@ def scrap_39(url, cid) -> ScrapResult: councilors.append(Councilor(name, party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_40(url, cid) -> ScrapResult: @@ -338,7 +337,7 @@ def scrap_40(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_41(url, cid) -> ScrapResult: @@ -362,7 +361,7 @@ def scrap_41(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) if __name__ == "__main__": diff --git a/scrap/local_councils/chungcheong.py b/scrap/local_councils/chungcheong.py index 962f845..3557d3b 100644 --- a/scrap/local_councils/chungcheong.py +++ b/scrap/local_councils/chungcheong.py @@ -1,16 +1,8 @@ from scrap.local_councils import * from scrap.local_councils.basic import * - -def scrap_124( - url, - cid, - args: ScrapBasicArgument = None, -) -> ScrapResult: - """충청북도 청주시 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_124(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: + """충청북도 청주시 """ soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -30,18 +22,11 @@ def scrap_124( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) -def scrap_125( - url, - cid, - args: ScrapBasicArgument = None, -) -> ScrapResult: - """충청북도 충주시 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_125(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: + """충청북도 충주시 """ soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -60,18 +45,11 @@ def scrap_125( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) - + return ret_local_councilors(cid, councilors) -def scrap_126( - url, - cid, - args: ScrapBasicArgument = None, -) -> ScrapResult: - """충청북도 제천시 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_126(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: + """충청북도 제천시 """ soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -89,18 +67,11 @@ def scrap_126( name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) -def scrap_132( - url, - cid, - args: ScrapBasicArgument = None, -) -> ScrapResult: - """충청북도 제천시 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_132(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: + """충청북도 제천시 """ soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -119,18 +90,11 @@ def scrap_132( name = name_tag.get_text(strip=True).split()[0] # 김철수 의원 -> 김철수 councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) - + return ret_local_councilors(cid, councilors) -def scrap_134( - url, - cid, - args: ScrapBasicArgument = None, -) -> ScrapResult: - """충청북도 증평군 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_134(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: + """충청북도 증평군 """ soup = get_soup(url, verify=False, encoding="euc-kr") councilors: List[Councilor] = [] @@ -150,14 +114,12 @@ def scrap_134( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) - + return ret_local_councilors(cid, councilors) -def scrap_140(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: - """충청남도 태안군 페이지에서 의원 상세약력 스크랩 - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 +def scrap_140(url, cid, args: ScrapBasicArgument = None +) -> ScrapResult: + """충청남도 태안군 """ soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -173,7 +135,7 @@ def scrap_140(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_142(url, cid, args) -> ScrapResult: @@ -198,7 +160,7 @@ def scrap_142(url, cid, args) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) if __name__ == "__main__": diff --git a/scrap/local_councils/daegu.py b/scrap/local_councils/daegu.py index c0b96df..9884395 100644 --- a/scrap/local_councils/daegu.py +++ b/scrap/local_councils/daegu.py @@ -19,7 +19,7 @@ def scrap_42(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_43(url, cid) -> ScrapResult: @@ -50,7 +50,7 @@ def scrap_43(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_44(url, cid) -> ScrapResult: @@ -73,7 +73,7 @@ def scrap_44(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_45(url, cid) -> ScrapResult: @@ -94,7 +94,7 @@ def scrap_45(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_46(url, cid) -> ScrapResult: @@ -115,7 +115,7 @@ def scrap_46(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_47(url, cid) -> ScrapResult: @@ -134,7 +134,7 @@ def scrap_47(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_48(url, cid) -> ScrapResult: @@ -156,7 +156,7 @@ def scrap_48(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_49(url, cid) -> ScrapResult: @@ -191,7 +191,7 @@ def scrap_49(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) if __name__ == "__main__": diff --git a/scrap/local_councils/daejeon.py b/scrap/local_councils/daejeon.py index 992a026..069f4ed 100644 --- a/scrap/local_councils/daejeon.py +++ b/scrap/local_councils/daejeon.py @@ -31,7 +31,7 @@ def scrap_65(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_66(url, cid) -> ScrapResult: @@ -49,7 +49,7 @@ def scrap_66(url, cid) -> ScrapResult: party = party_info.find_next("span").get_text(strip=True) councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_67( @@ -73,7 +73,7 @@ def scrap_67( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_68(url, cid) -> ScrapResult: @@ -93,7 +93,7 @@ def scrap_68(url, cid) -> ScrapResult: party = party_info.find_next("span").get_text(strip=True) councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_69(url, cid) -> ScrapResult: @@ -112,7 +112,7 @@ def scrap_69(url, cid) -> ScrapResult: party = party_info.find_next("span").get_text(strip=True) councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) if __name__ == "__main__": diff --git a/scrap/local_councils/gangwon.py b/scrap/local_councils/gangwon.py index 365413e..fd45130 100644 --- a/scrap/local_councils/gangwon.py +++ b/scrap/local_councils/gangwon.py @@ -4,8 +4,6 @@ from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options -from scrap.utils.types import Councilor, ScrapResult, ScrapBasicArgument -from scrap.utils.requests import get_soup from scrap.local_councils.basic import * from scrap.utils.utils import getPartyList @@ -52,7 +50,7 @@ def scrap_107( councilors.append(Councilor(name, party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_113(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: @@ -73,7 +71,7 @@ def scrap_113(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_114( @@ -100,7 +98,7 @@ def scrap_114( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_115( @@ -125,7 +123,7 @@ def scrap_115( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_116( @@ -146,7 +144,7 @@ def scrap_116( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_117( @@ -172,7 +170,7 @@ def scrap_117( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_118( @@ -193,7 +191,7 @@ def scrap_118( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_119( @@ -226,7 +224,7 @@ def scrap_119( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_120( @@ -249,7 +247,7 @@ def scrap_120( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_121( @@ -270,7 +268,7 @@ def scrap_121( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_122( @@ -291,7 +289,7 @@ def scrap_122( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_123( @@ -315,7 +313,7 @@ def scrap_123( # TODO councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) if __name__ == "__main__": diff --git a/scrap/local_councils/gwangju.py b/scrap/local_councils/gwangju.py index efc0875..02f8cc0 100644 --- a/scrap/local_councils/gwangju.py +++ b/scrap/local_councils/gwangju.py @@ -52,7 +52,7 @@ def scrap_62(url, cid) -> ScrapResult: councilors.append(Councilor(name, party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_63(url, cid) -> ScrapResult: @@ -88,7 +88,7 @@ def scrap_63(url, cid) -> ScrapResult: councilors.append(Councilor(name, party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_64(url, cid) -> ScrapResult: @@ -127,4 +127,4 @@ def scrap_64(url, cid) -> ScrapResult: councilors.append(Councilor(name, party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) diff --git a/scrap/local_councils/gyeonggi.py b/scrap/local_councils/gyeonggi.py index 21b8cdb..d6783cb 100644 --- a/scrap/local_councils/gyeonggi.py +++ b/scrap/local_councils/gyeonggi.py @@ -76,7 +76,7 @@ def scrap_88(url, cid, args: ScrapBasicArgument) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def get_party_103(profile, element, class_, wrapper_element, wrapper_class_, url): @@ -124,4 +124,4 @@ def scrap_103(url, cid, args: ScrapBasicArgument) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) diff --git a/scrap/local_councils/incheon.py b/scrap/local_councils/incheon.py index e2eb65e..da3601d 100644 --- a/scrap/local_councils/incheon.py +++ b/scrap/local_councils/incheon.py @@ -1,7 +1,6 @@ """인천광역시를 스크랩. 50-57번째 의회까지 있음. """ -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup, get_selenium, By +from scrap.utils.requests import get_selenium, By from scrap.local_councils.basic import ( get_profiles, get_name, @@ -23,7 +22,7 @@ def scrap_50(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_51(url, cid) -> ScrapResult: @@ -77,7 +76,7 @@ def scrap_52(url, cid) -> ScrapResult: councilors.append(Councilor(name, party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_53(url, cid) -> ScrapResult: @@ -98,7 +97,7 @@ def scrap_53(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_54(url, cid) -> ScrapResult: @@ -117,7 +116,7 @@ def scrap_54(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_55(url, cid) -> ScrapResult: @@ -162,7 +161,7 @@ def scrap_56(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_57(url, args) -> ScrapResult: @@ -198,7 +197,7 @@ def scrap_57(url, args) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) if __name__ == "__main__": diff --git a/scrap/local_councils/jeolla.py b/scrap/local_councils/jeolla.py index 0c73bc7..a3f9f06 100644 --- a/scrap/local_councils/jeolla.py +++ b/scrap/local_councils/jeolla.py @@ -19,7 +19,7 @@ def scrap_154( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_155( @@ -40,7 +40,7 @@ def scrap_155( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_156( @@ -62,7 +62,7 @@ def scrap_156( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_157( @@ -83,7 +83,7 @@ def scrap_157( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_160( @@ -124,7 +124,7 @@ def scrap_162( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_163( @@ -151,7 +151,7 @@ def scrap_163( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_164( @@ -179,7 +179,7 @@ def scrap_164( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_165( @@ -201,7 +201,7 @@ def scrap_165( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_167( @@ -222,7 +222,7 @@ def scrap_167( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) if __name__ == "__main__": diff --git a/scrap/local_councils/seoul.py b/scrap/local_councils/seoul.py index c64a265..dbe5aec 100644 --- a/scrap/local_councils/seoul.py +++ b/scrap/local_councils/seoul.py @@ -3,8 +3,6 @@ """ from urllib.parse import urlparse -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup from scrap.local_councils import * @@ -27,7 +25,7 @@ def scrap_1( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_2(url, cid) -> ScrapResult: @@ -56,7 +54,7 @@ def scrap_2(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_3(url, cid) -> ScrapResult: @@ -76,7 +74,7 @@ def scrap_3(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_4(url, cid) -> ScrapResult: @@ -95,7 +93,7 @@ def scrap_4(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_5(url, cid) -> ScrapResult: @@ -116,7 +114,7 @@ def scrap_5(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_6(url, cid) -> ScrapResult: @@ -150,7 +148,7 @@ def scrap_6(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_7(url, cid) -> ScrapResult: @@ -169,7 +167,7 @@ def scrap_7(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_8(url, cid) -> ScrapResult: @@ -190,7 +188,7 @@ def scrap_8(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_9(url, cid) -> ScrapResult: @@ -213,7 +211,7 @@ def scrap_9(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_10( @@ -237,7 +235,7 @@ def scrap_10( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_11(url, cid) -> ScrapResult: @@ -258,7 +256,7 @@ def scrap_11(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_12(url, cid) -> ScrapResult: @@ -281,7 +279,7 @@ def scrap_12(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_13( @@ -310,7 +308,7 @@ def scrap_13( councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_14(url, cid) -> ScrapResult: @@ -329,7 +327,7 @@ def scrap_14(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_15(url, cid) -> ScrapResult: @@ -362,7 +360,7 @@ def scrap_15(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_16(url, cid) -> ScrapResult: @@ -387,7 +385,7 @@ def scrap_16(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_17(url, cid) -> ScrapResult: @@ -410,7 +408,7 @@ def scrap_17(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_18(url, cid) -> ScrapResult: @@ -431,7 +429,7 @@ def scrap_18(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_19(url, cid) -> ScrapResult: @@ -450,7 +448,7 @@ def scrap_19(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_20(url, cid) -> ScrapResult: @@ -469,7 +467,7 @@ def scrap_20(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_21(url, cid) -> ScrapResult: @@ -490,7 +488,7 @@ def scrap_21(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_22(url, cid) -> ScrapResult: @@ -511,7 +509,7 @@ def scrap_22(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_23(url, cid) -> ScrapResult: @@ -534,7 +532,7 @@ def scrap_23(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_24(url, cid) -> ScrapResult: @@ -553,7 +551,7 @@ def scrap_24(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_25(url, cid) -> ScrapResult: @@ -574,7 +572,7 @@ def scrap_25(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) if __name__ == "__main__": diff --git a/scrap/local_councils/ulsan.py b/scrap/local_councils/ulsan.py index d74692b..de7a482 100644 --- a/scrap/local_councils/ulsan.py +++ b/scrap/local_councils/ulsan.py @@ -24,7 +24,7 @@ def scrap_70(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_71(url, cid) -> ScrapResult: @@ -51,7 +51,7 @@ def scrap_71(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_72(url, cid) -> ScrapResult: @@ -71,7 +71,7 @@ def scrap_72(url, cid) -> ScrapResult: party = party_info[0].get_text(strip=True).split(": ")[1] councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_73(url, cid) -> ScrapResult: @@ -91,7 +91,7 @@ def scrap_73(url, cid) -> ScrapResult: party = party_info[0].get_text(strip=True).split(": ")[1] councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_74(url, cid) -> ScrapResult: @@ -118,7 +118,7 @@ def scrap_74(url, cid) -> ScrapResult: councilors.append(Councilor(name=name, party=party)) - return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) if __name__ == "__main__": From 81ed3359065b6c297731a1e411c6b251c7999bd3 Mon Sep 17 00:00:00 2001 From: Re-st Date: Wed, 8 Nov 2023 12:18:55 +0000 Subject: [PATCH 11/12] Formatted with black --- scrap/local_councils/__init__.py | 1 + scrap/local_councils/busan.py | 1 + scrap/local_councils/chungcheong.py | 22 ++++++++-------------- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/scrap/local_councils/__init__.py b/scrap/local_councils/__init__.py index 5868716..4aed994 100644 --- a/scrap/local_councils/__init__.py +++ b/scrap/local_councils/__init__.py @@ -10,6 +10,7 @@ from scrap.utils.types import CouncilType from scrap.utils.utils import getPartyList + def ret_local_councilors(cid, councilors): return ScrapResult( council_id=cid, diff --git a/scrap/local_councils/busan.py b/scrap/local_councils/busan.py index 8240415..7432d22 100644 --- a/scrap/local_councils/busan.py +++ b/scrap/local_councils/busan.py @@ -4,6 +4,7 @@ from scrap.local_councils import * from scrap.local_councils.basic import ret_local_councilors + def scrap_26(url, cid) -> ScrapResult: """부산 중구""" soup = get_soup(url, verify=False) diff --git a/scrap/local_councils/chungcheong.py b/scrap/local_councils/chungcheong.py index 3557d3b..340a25d 100644 --- a/scrap/local_councils/chungcheong.py +++ b/scrap/local_councils/chungcheong.py @@ -1,9 +1,9 @@ from scrap.local_councils import * from scrap.local_councils.basic import * + def scrap_124(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: - """충청북도 청주시 - """ + """충청북도 청주시""" soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -26,8 +26,7 @@ def scrap_124(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: def scrap_125(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: - """충청북도 충주시 - """ + """충청북도 충주시""" soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -49,8 +48,7 @@ def scrap_125(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: def scrap_126(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: - """충청북도 제천시 - """ + """충청북도 제천시""" soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -71,8 +69,7 @@ def scrap_126(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: def scrap_132(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: - """충청북도 제천시 - """ + """충청북도 제천시""" soup = get_soup(url, verify=False) councilors: List[Councilor] = [] @@ -94,8 +91,7 @@ def scrap_132(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: def scrap_134(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: - """충청북도 증평군 - """ + """충청북도 증평군""" soup = get_soup(url, verify=False, encoding="euc-kr") councilors: List[Councilor] = [] @@ -117,10 +113,8 @@ def scrap_134(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: return ret_local_councilors(cid, councilors) -def scrap_140(url, cid, args: ScrapBasicArgument = None -) -> ScrapResult: - """충청남도 태안군 - """ +def scrap_140(url, cid, args: ScrapBasicArgument = None) -> ScrapResult: + """충청남도 태안군""" soup = get_soup(url, verify=False) councilors: List[Councilor] = [] From 709e115492df272139feb7ec55ade0e15f94496e Mon Sep 17 00:00:00 2001 From: pingpingy1 Date: Sun, 12 Nov 2023 22:40:29 +0900 Subject: [PATCH 12/12] =?UTF-8?q?[scrap]=20JS=20=EA=B8=B0=EB=B0=98=20?= =?UTF-8?q?=EC=8A=A4=ED=81=AC=EB=9E=A9=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scrap/group_head.py | 12 ------ scrap/local_councils/gangwon.py | 18 +------- scrap/local_councils/incheon.py | 75 ++++++++++++++++----------------- scrap/local_councils/jeolla.py | 43 ++++++++++++------- 4 files changed, 66 insertions(+), 82 deletions(-) diff --git a/scrap/group_head.py b/scrap/group_head.py index 40f44da..1e2009a 100644 --- a/scrap/group_head.py +++ b/scrap/group_head.py @@ -20,18 +20,6 @@ def scrap_group_heads( browser = get_selenium(url) - # driver_loc = os.popen("which chromedriver").read().strip() - # if len(driver_loc) == 0: - # raise Exception("ChromeDriver를 다운로드한 후 다시 시도해주세요.") - - # chrome_options = Options() - # chrome_options.add_argument("--headless") - # chrome_options.add_argument("--no-sandbox") - - # webdriver_service = Service(driver_loc) - # browser = webdriver.Chrome(service=webdriver_service, options=chrome_options) - # browser.get(url) - areas = [ tag.text.strip() for tag in browser.find_element( diff --git a/scrap/local_councils/gangwon.py b/scrap/local_councils/gangwon.py index fd45130..6225610 100644 --- a/scrap/local_councils/gangwon.py +++ b/scrap/local_councils/gangwon.py @@ -1,10 +1,7 @@ import os -from selenium import webdriver -from selenium.webdriver.chrome.service import Service -from selenium.webdriver.common.by import By -from selenium.webdriver.chrome.options import Options from scrap.local_councils.basic import * +from scrap.utils.requests import get_selenium, By from scrap.utils.utils import getPartyList party_keywords = getPartyList() @@ -17,18 +14,7 @@ def scrap_107( ) -> ScrapResult: """강원도 원주시""" councilors: list[Councilor] = [] - - driver_loc = os.popen("which chromedriver").read().strip() - if len(driver_loc) == 0: - raise Exception("ChromeDriver를 다운로드한 후 다시 시도해주세요.") - - chrome_options = Options() - chrome_options.add_argument("--headless") - chrome_options.add_argument("--no-sandbox") - - webdriver_service = Service(driver_loc) - browser = webdriver.Chrome(service=webdriver_service, options=chrome_options) - browser.get(url) + browser = get_selenium(url) pfs_wrapper = browser.find_element(By.CSS_SELECTOR, "div[id='content']") councilor_infos = pfs_wrapper.find_elements(By.CSS_SELECTOR, "dl") diff --git a/scrap/local_councils/incheon.py b/scrap/local_councils/incheon.py index da3601d..2e4b87e 100644 --- a/scrap/local_councils/incheon.py +++ b/scrap/local_councils/incheon.py @@ -27,32 +27,34 @@ def scrap_50(url, cid) -> ScrapResult: def scrap_51(url, cid) -> ScrapResult: """인천 동구""" - raise Exception("현재 인천 동구의회 사이트는 SSLV3_ALERT_HANDSHAKE_FAILURE 에러가 발생합니다") - - # soup = get_soup(url, verify=False) - # councilors: list[Councilor] = [] - + browser = get_selenium(url) + councilors: list[Councilor] = [] -# # 프로필 링크 스크랩을 위해 base_url 추출 -# parsed_url = urlparse(url) -# base_url = f"{parsed_url.scheme}://{parsed_url.netloc}" + cur_win = browser.current_window_handle -# for name_tag in soup.find_all('strong', class_='name'): -# name = name_tag.get_text(strip=True) -# party = '정당 정보 없음' + for profile in browser.find_elements(By.CSS_SELECTOR, "dl[class='profile']"): + name_tag = profile.find_element(By.CSS_SELECTOR, "strong[class='name']") + name = name_tag.text.strip() if name_tag else "이름 정보 없음" -# profile_link = name_tag.find_next('a', class_='abtn1') -# if profile_link: -# profile_url = base_url + profile_link['onclick'][13:104] -# profile_soup = get_soup(profile_url, verify=False) + party = "정당 정보 없음" + profile_link = profile.find_element(By.TAG_NAME, "a") + if profile_link: + profile_link.click() + browser.switch_to.window( + [win for win in browser.window_handles if win != cur_win][0] + ) + party_tag = browser.find_elements(By.CSS_SELECTOR, "span[class='detail']")[ + 1 + ] + if party_tag: + party = party_tag.text.strip() -# party_info = profile_soup.find('span', class_='subject', string='소속정당') -# if party_info and (party_span := party_info.find_next('span', class_='detail')) is not None: -# party = party_span.get_text(strip=True) + councilors.append(Councilor(name, party)) -# councilors.append(Councilor(name=name, party=party)) + browser.close() + browser.switch_to.window(cur_win) -# return returncouncilors(cid, councilors) + return ret_local_councilors(cid, councilors) def scrap_52(url, cid) -> ScrapResult: @@ -121,27 +123,22 @@ def scrap_54(url, cid) -> ScrapResult: def scrap_55(url, cid) -> ScrapResult: """인천 부평구""" - raise Exception("현재 인천 부평구의회 사이트는 SSLV3_ALERT_HANDSHAKE_FAILURE 에러가 발생합니다") - - # soup = get_soup(url, verify=False) - # councilors: list[Councilor] = [] - - # for profile in soup.find_all('div', class_='profile'): - # name_tag = profile.find('strong', class_='name') - # name = name_tag.get_text(strip=True).split()[0].strip() if name_tag else '이름 정보 없음' + browser = get_selenium(url) + councilors: list[Councilor] = [] - # party = '정당 정보 없음' - # party_info = profile.find('strong', string='소속정당').find_next('span') - # if party_info: - # party = party_info.get_text(strip=True).split()[-1].strip() + for profile in browser.find_elements(By.CSS_SELECTOR, "dl[class='profile']"): + name_tag = profile.find_element(By.CSS_SELECTOR, "strong[class='name']") + name = name_tag.text.strip().split()[0].strip() if name_tag else "이름 정보 없음" - # councilors.append(Councilor(name=name, party=party)) + party_tag = profile.find_elements(By.TAG_NAME, "li")[2] + party = ( + party_tag.find_element(By.TAG_NAME, "span").text.strip().split()[-1].strip() + if party_tag + else "정당 정보 없음" + ) + councilors.append(Councilor(name, party)) - # return returncouncilors(cid, councilors) - # council_id=55, - # council_type=CouncilType.LOCAL_COUNCIL, - # councilors=councilors - # ) + return ret_local_councilors(cid, councilors) def scrap_56(url, cid) -> ScrapResult: @@ -201,4 +198,4 @@ def scrap_57(url, args) -> ScrapResult: if __name__ == "__main__": - print(scrap_52()) + print(scrap_51("https://council.icdonggu.go.kr/korean/member/active", 51)) diff --git a/scrap/local_councils/jeolla.py b/scrap/local_councils/jeolla.py index a3f9f06..d8630f8 100644 --- a/scrap/local_councils/jeolla.py +++ b/scrap/local_councils/jeolla.py @@ -1,4 +1,5 @@ from scrap.local_councils import * +from scrap.utils.requests import get_selenium, By def scrap_154( @@ -86,24 +87,36 @@ def scrap_157( return ret_local_councilors(cid, councilors) -def scrap_160( - url, - cid, - args: ScrapBasicArgument = None, -) -> ScrapResult: +def scrap_160(url, cid) -> ScrapResult: """전라북도 임실군""" - # TODO: js로 동적으로 읽어옴 - raise NotImplementedError + browser = get_selenium(url) + councilors: list[Councilor] = [] + for profile in browser.find_elements(By.CSS_SELECTOR, "div[class='col-lg-6']"): + name_tag = profile.find_element(By.TAG_NAME, "strong") + name = name_tag.text.strip() if name_tag else "이름 정보 없음" -def scrap_161( - url, - cid, - args: ScrapBasicArgument = None, -) -> ScrapResult: + party = "정당 정보 없음" + councilors.append(Councilor(name, party)) + + return ret_local_councilors(cid, councilors) + + +def scrap_161(url, cid) -> ScrapResult: """전라북도 순창군""" - # TODO: js로 동적으로 읽어옴 - raise NotImplementedError + browser = get_selenium(url) + councilors: list[Councilor] = [] + + for profile in browser.find_elements(By.CSS_SELECTOR, "div[class='con']"): + name_tag = profile.find_element(By.TAG_NAME, "strong") + name = name_tag.text.strip()[:-2].strip() if name_tag else "이름 정보 없음" + + party_tag = profile.find_elements(By.TAG_NAME, "dd")[1] + party = party_tag.text.strip() if party_tag else "정당 정보 없음" + + councilors.append(Councilor(name, party)) + + return ret_local_councilors(cid, councilors) def scrap_162( @@ -226,4 +239,4 @@ def scrap_167( if __name__ == "__main__": - print(scrap_167()) + print(scrap_161("https://www.sunchangcouncil.go.kr/main/contents/lawmaker", 161))