From e26eb2ff148a8425f29df5aba251d97cbae5f34a Mon Sep 17 00:00:00 2001 From: pingpingy1 Date: Wed, 8 Nov 2023 19:52:38 +0900 Subject: [PATCH] =?UTF-8?q?[Scrap]=20=EC=9D=B8=EC=B2=9C=20=EB=AF=B8?= =?UTF-8?q?=EC=B6=94=ED=99=80=EA=B5=AC=20=EC=8A=A4=ED=81=AC=EB=9E=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scrap/local_councils/busan.py | 4 ---- scrap/local_councils/incheon.py | 26 +++++++++++++++++--------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/scrap/local_councils/busan.py b/scrap/local_councils/busan.py index 38d1553..05e1a08 100644 --- a/scrap/local_councils/busan.py +++ b/scrap/local_councils/busan.py @@ -427,10 +427,6 @@ def scrap_39( """ councilors: list[Councilor] = [] - driver_loc = os.popen("which chromedriver").read().strip() - if len(driver_loc) == 0: - raise Exception("ChromeDriver를 다운로드한 후 다시 시도해주세요.") - browser = get_selenium(url) councilor_infos = browser.find_elements(By.CSS_SELECTOR, "dl[class='info']") diff --git a/scrap/local_councils/incheon.py b/scrap/local_councils/incheon.py index cb9c985..5e015ae 100644 --- a/scrap/local_councils/incheon.py +++ b/scrap/local_councils/incheon.py @@ -1,7 +1,7 @@ """인천광역시를 스크랩. 50-57번째 의회까지 있음. """ from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup +from scrap.utils.requests import get_soup, get_selenium, By from scrap.local_councils.basic import ( get_profiles, get_name, @@ -79,16 +79,24 @@ def scrap_52( :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 """ - soup = get_soup(url, verify=False) + councilors: list[Councilor] = [] + browser = get_selenium(url) - script = ( - soup.find("div", class_="contents_header") - .find_next("script") - .get_text(strip=True) - ) + for profile in browser.find_elements(By.CSS_SELECTOR, "div[class='career_item']"): + name_tag = profile.find_element( + By.CSS_SELECTOR, "div[class='career_item_name']" + ) + name = name_tag.text.strip().split()[0].strip() if name_tag else "이름 정보 없음" + + party_tag = profile.find_element(By.TAG_NAME, "dl") + party = ( + party_tag.find_element(By.TAG_NAME, "dd").text.strip() + if party_tag + else "정당 정보 없음" + ) - # TODO + councilors.append(Councilor(name, party)) return ScrapResult( council_id="incheon-michuholgu", @@ -257,4 +265,4 @@ def scrap_57(url, args) -> ScrapResult: if __name__ == "__main__": - print(scrap_56()) + print(scrap_52())