Skip to content

Commit

Permalink
Merge pull request #43 from pingpingy1/main
Browse files Browse the repository at this point in the history
[Scrap] 단체장 이름 스크랩
  • Loading branch information
Re-st authored Oct 23, 2023
2 parents 2854e7c + 1f2f145 commit 5034030
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 0 deletions.
112 changes: 112 additions & 0 deletions scrap/group_head.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
from scrap.utils.types import CouncilType, Councilor, ScrapResult
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from time import sleep
import os


def scrap_metro_head(
id,
metropolis,
url="https://laiis.go.kr/lips/mlo/lcl/groupHeadList.do",
) -> ScrapResult:
"""내고장알리미를 이용해 광역단체장 인적사항 스크랩
:param id: 광역자치단체 id
:param metropolis: 특별시, 광역시, 혹은 도
:param url: 내고장알리미의 지자체 단체장 목록 사이트
:return: 국회의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""

driver_loc = os.popen("which chromedriver").read().strip()
if len(driver_loc) == 0:
raise Exception("ChromeDriver를 다운로드한 후 다시 시도해주세요.")

chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")

webdriver_service = Service(driver_loc)
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
browser.get(url)

link = browser.find_element(
By.CSS_SELECTOR, f'li[data-areaname="{metropolis}"]'
).find_element(By.TAG_NAME, "a")
link.click()
sleep(3)

profile = browser.find_element(By.CSS_SELECTOR, "div[class='head_txt_box']")
name_tag = profile.find_element(
By.CSS_SELECTOR, "p[class='text_align_center fs_18']"
)
name = name_tag.text.strip() if name_tag else "이름 정보 없음"
party = "정당 정보 없음"

browser.quit()
return ScrapResult(
council_id=id,
council_type=CouncilType.LOCAL_LEADER,
councilors=[Councilor(name=name, party=party)],
)


def scrap_local_head(
id,
metropolis,
town,
url="https://laiis.go.kr/lips/mlo/lcl/groupHeadList.do",
) -> ScrapResult:
"""내고장알리미를 이용해 기초단체장 인적사항 스크랩
:param id: 기초자치단체 id
:param metropolis: 특별시, 광역시, 혹은 도
:param town: 시, 군, 구
:param url: 내고장알리미의 지자체 단체장 목록 사이트
:return: 국회의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""

driver_loc = os.popen("which chromedriver").read().strip()
if len(driver_loc) == 0:
raise Exception("ChromeDriver를 다운로드한 후 다시 시도해주세요.")

chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")

webdriver_service = Service(driver_loc)
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
browser.get(url)

link = browser.find_element(
By.CSS_SELECTOR, f'li[data-areaname="{metropolis}"]'
).find_element(By.TAG_NAME, "a")
link.click()
sleep(3)

for profile in browser.find_elements(By.CSS_SELECTOR, "div[class='head_txt_box']"):
if profile.find_element(
By.CSS_SELECTOR, "p[class='text_align_center fs_14']"
).text.startswith(town):
name_tag = profile.find_element(
By.CSS_SELECTOR, "p[class='text_align_center fs_18']"
)
name = name_tag.text.strip() if name_tag else "이름 정보 없음"
party = "정당 정보 없음"

browser.quit()
return ScrapResult(
council_id=id,
council_type=CouncilType.LOCAL_LEADER,
councilors=[Councilor(name=name, party=party)],
)

browser.quit()
raise Exception(f"{metropolis} {town}의 기초의회장을 찾지 못했습니다")


if __name__ == "__main__":
print(scrap_metro_head("seoul", "서울"))
print(scrap_local_head("seoul-jongno", "서울", "종로구"))
2 changes: 2 additions & 0 deletions scrap/utils/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ class CouncilType(str, Enum):
LOCAL_COUNCIL = "local_council"
NATIONAL_COUNCIL = "national_council"
METROPOLITAN_COUNCIL = "metropolitan_council"
LOCAL_LEADER = "local_leader"
METRO_LEADER = "metro_leader"
"""
기초의회
"""
Expand Down

0 comments on commit 5034030

Please sign in to comment.