diff --git a/.github/workflows/format-black.yml b/.github/workflows/format-black.yml
new file mode 100644
index 0000000..94e591a
--- /dev/null
+++ b/.github/workflows/format-black.yml
@@ -0,0 +1,16 @@
+name: Format with black
+on: [push, pull_request]
+
+jobs:
+ format:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - name: Format files using the black formatter
+ uses: rickstaa/action-black@v1
+ id: action_black
+ with:
+ black_args: "."
+ - uses: stefanzweifel/git-auto-commit-action@v5
+ with:
+ commit_message: Formatted with black
\ No newline at end of file
diff --git a/API/__init__.py b/API/__init__.py
index 6a8f860..106136f 100644
--- a/API/__init__.py
+++ b/API/__init__.py
@@ -1,3 +1,3 @@
"""
공공데이터포털 API를 이용한 데이터 수집을 위한 패키지입니다.
-"""
\ No newline at end of file
+"""
diff --git a/API/candidate.py b/API/candidate.py
index 1e2c311..464da71 100644
--- a/API/candidate.py
+++ b/API/candidate.py
@@ -6,24 +6,24 @@
from configurations.secrets import OpenDataPortalSecrets
BASE_DIR = os.path.join(os.path.dirname(__file__), os.pardir)
-base_url = 'http://apis.data.go.kr/9760000/PofelcddInfoInqireService/getPofelcddRegistSttusInfoInqire'
+base_url = "http://apis.data.go.kr/9760000/PofelcddInfoInqireService/getPofelcddRegistSttusInfoInqire"
page_no = 1
num_of_rows = 10000
parliamentVote = [20220601, 20230405]
-sgCodes = input("Input the number of sgTypecode: ").split(',')
+sgCodes = input("Input the number of sgTypecode: ").split(",")
data_list = []
for sgId in parliamentVote:
for code in sgCodes:
params = {
- 'serviceKey': OpenDataPortalSecrets.service_key,
- 'pageNo': str(page_no),
- 'numOfRows': str(num_of_rows),
- 'sgId': str(sgId),
- 'sgTypecode': str(code),
- 'sggName': '',
- 'sdName': '',
- 'jdName': ''
+ "serviceKey": OpenDataPortalSecrets.service_key,
+ "pageNo": str(page_no),
+ "numOfRows": str(num_of_rows),
+ "sgId": str(sgId),
+ "sgTypecode": str(code),
+ "sggName": "",
+ "sdName": "",
+ "jdName": "",
}
response = requests.get(base_url, params=params)
@@ -34,56 +34,58 @@
root = ET.fromstring(response.content)
for item in root.findall(".//item"):
- sgId = item.find('sgId').text
- sggName = item.find('sggName').text
- sdName = item.find('sdName').text
- wiwName = item.find('wiwName').text
- giho = item.find('giho').text
- jdName = item.find('jdName').text
- name = item.find('name').text
- hanjaName = item.find('hanjaName').text
- gender = item.find('gender').text
- birthday = item.find('birthday').text
- age = item.find('age').text
- addr = item.find('addr').text
- jobId = item.find('jobId').text
- job = item.find('job').text
- eduId = item.find('eduId').text
- edu = item.find('edu').text
- career1 = item.find('career1').text
- career2 = item.find('career2').text
- status = item.find('status').text
+ sgId = item.find("sgId").text
+ sggName = item.find("sggName").text
+ sdName = item.find("sdName").text
+ wiwName = item.find("wiwName").text
+ giho = item.find("giho").text
+ jdName = item.find("jdName").text
+ name = item.find("name").text
+ hanjaName = item.find("hanjaName").text
+ gender = item.find("gender").text
+ birthday = item.find("birthday").text
+ age = item.find("age").text
+ addr = item.find("addr").text
+ jobId = item.find("jobId").text
+ job = item.find("job").text
+ eduId = item.find("eduId").text
+ edu = item.find("edu").text
+ career1 = item.find("career1").text
+ career2 = item.find("career2").text
+ status = item.find("status").text
- data_list.append({
- 'sgId': sgId,
- 'sggName': sggName,
- 'sdName': sdName,
- 'wiwName': wiwName,
- 'giho': giho,
- 'jdName': jdName,
- 'name': name,
- 'hanjaName': hanjaName,
- 'gender': gender,
- 'birthday': birthday,
- 'age': age,
- 'addr': addr,
- 'jobId': jobId,
- 'job': job,
- 'eduId': eduId,
- 'edu': edu,
- 'career1': career1,
- 'career2': career2,
- 'status': status
- })
+ data_list.append(
+ {
+ "sgId": sgId,
+ "sggName": sggName,
+ "sdName": sdName,
+ "wiwName": wiwName,
+ "giho": giho,
+ "jdName": jdName,
+ "name": name,
+ "hanjaName": hanjaName,
+ "gender": gender,
+ "birthday": birthday,
+ "age": age,
+ "addr": addr,
+ "jobId": jobId,
+ "job": job,
+ "eduId": eduId,
+ "edu": edu,
+ "career1": career1,
+ "career2": career2,
+ "status": status,
+ }
+ )
# Create a DataFrame from the collected data
df = pd.DataFrame(data_list)
# Save the DataFrame to an Excel file
-directory_path = os.path.join(BASE_DIR, 'output')
+directory_path = os.path.join(BASE_DIR, "output")
if not os.path.exists(directory_path):
os.makedirs(directory_path)
-excel_file = '[후보][구시군의회의원].xlsx'
+excel_file = "[후보][구시군의회의원].xlsx"
df.to_excel(os.path.join(directory_path, excel_file), index=False)
-print(f'Data has been saved to {excel_file}')
+print(f"Data has been saved to {excel_file}")
diff --git a/API/elected.py b/API/elected.py
index 98311e6..38a6030 100644
--- a/API/elected.py
+++ b/API/elected.py
@@ -6,26 +6,34 @@
from configurations.secrets import OpenDataPortalSecrets
BASE_DIR = os.path.join(os.path.dirname(__file__), os.pardir)
-base_url = 'http://apis.data.go.kr/9760000/WinnerInfoInqireService2/getWinnerInfoInqire'
-params ={'serviceKey' : OpenDataPortalSecrets.service_key,\
- 'pageNo' : '1', 'numOfRows' : '10', 'sgId' : '20230405', 'sgTypecode' : '2', 'sdName' : '전라북도', 'sggName' : '전주시을', 'jdName' : ''}
+base_url = "http://apis.data.go.kr/9760000/WinnerInfoInqireService2/getWinnerInfoInqire"
+params = {
+ "serviceKey": OpenDataPortalSecrets.service_key,
+ "pageNo": "1",
+ "numOfRows": "10",
+ "sgId": "20230405",
+ "sgTypecode": "2",
+ "sdName": "전라북도",
+ "sggName": "전주시을",
+ "jdName": "",
+}
page_no = 1
num_of_rows = 10000
parliamentVote = [20200415, 20210407, 20220601, 20230405]
-sgCodes = input("Input the number of sgTypecode: ").split(',')
+sgCodes = input("Input the number of sgTypecode: ").split(",")
data_list = []
for sgId in parliamentVote:
for code in sgCodes:
params = {
- 'serviceKey': OpenDataPortalSecrets.service_key,
- 'pageNo': str(page_no),
- 'numOfRows': str(num_of_rows),
- 'sgId': str(sgId),
- 'sgTypecode': str(code),
- 'sggName': '',
- 'sdName': '',
- 'jdName': ''
+ "serviceKey": OpenDataPortalSecrets.service_key,
+ "pageNo": str(page_no),
+ "numOfRows": str(num_of_rows),
+ "sgId": str(sgId),
+ "sgTypecode": str(code),
+ "sggName": "",
+ "sdName": "",
+ "jdName": "",
}
response = requests.get(base_url, params=params)
@@ -36,56 +44,58 @@
root = ET.fromstring(response.content)
for item in root.findall(".//item"):
- sgId = item.find('sgId').text
- sggName = item.find('sggName').text
- sdName = item.find('sdName').text
- wiwName = item.find('wiwName').text
- giho = item.find('giho').text
- jdName = item.find('jdName').text
- name = item.find('name').text
- hanjaName = item.find('hanjaName').text
- gender = item.find('gender').text
- birthday = item.find('birthday').text
- age = item.find('age').text
- addr = item.find('addr').text
- jobId = item.find('jobId').text
- job = item.find('job').text
- eduId = item.find('eduId').text
- edu = item.find('edu').text
- career1 = item.find('career1').text
- career2 = item.find('career2').text
+ sgId = item.find("sgId").text
+ sggName = item.find("sggName").text
+ sdName = item.find("sdName").text
+ wiwName = item.find("wiwName").text
+ giho = item.find("giho").text
+ jdName = item.find("jdName").text
+ name = item.find("name").text
+ hanjaName = item.find("hanjaName").text
+ gender = item.find("gender").text
+ birthday = item.find("birthday").text
+ age = item.find("age").text
+ addr = item.find("addr").text
+ jobId = item.find("jobId").text
+ job = item.find("job").text
+ eduId = item.find("eduId").text
+ edu = item.find("edu").text
+ career1 = item.find("career1").text
+ career2 = item.find("career2").text
# status = item.find('status').text
- data_list.append({
- 'sgId': sgId,
- 'sggName': sggName,
- 'sdName': sdName,
- 'wiwName': wiwName,
- 'giho': giho,
- 'jdName': jdName,
- 'name': name,
- 'hanjaName': hanjaName,
- 'gender': gender,
- 'birthday': birthday,
- 'age': age,
- 'addr': addr,
- 'jobId': jobId,
- 'job': job,
- 'eduId': eduId,
- 'edu': edu,
- 'career1': career1,
- 'career2': career2,
- # 'status': status
- })
+ data_list.append(
+ {
+ "sgId": sgId,
+ "sggName": sggName,
+ "sdName": sdName,
+ "wiwName": wiwName,
+ "giho": giho,
+ "jdName": jdName,
+ "name": name,
+ "hanjaName": hanjaName,
+ "gender": gender,
+ "birthday": birthday,
+ "age": age,
+ "addr": addr,
+ "jobId": jobId,
+ "job": job,
+ "eduId": eduId,
+ "edu": edu,
+ "career1": career1,
+ "career2": career2,
+ # 'status': status
+ }
+ )
# Create a DataFrame from the collected data
df = pd.DataFrame(data_list)
# Save the DataFrame to an Excel file
-directory_path = os.path.join(BASE_DIR, 'output')
+directory_path = os.path.join(BASE_DIR, "output")
if not os.path.exists(directory_path):
os.makedirs(directory_path)
-excel_file = '[당선][구시군의회의원].xlsx'
+excel_file = "[당선][구시군의회의원].xlsx"
df.to_excel(os.path.join(directory_path, excel_file), index=False)
-print(f'Data has been saved to {excel_file}')
\ No newline at end of file
+print(f"Data has been saved to {excel_file}")
diff --git a/API/votecode.py b/API/votecode.py
index b9f7e05..d040987 100644
--- a/API/votecode.py
+++ b/API/votecode.py
@@ -6,32 +6,39 @@
import argparse
parser = argparse.ArgumentParser()
-parser.add_argument('-c', '--code', action='store_true', help='코드를 출력합니다.')
+parser.add_argument("-c", "--code", action="store_true", help="코드를 출력합니다.")
args = parser.parse_args()
if args.code:
- print("(0) 대표선거명 (1)대통령,(2)국회의원,(3)시도지사,(4)구시군장,(5)시도의원,\
- (6)구시군의회의원, (7)국회의원비례대표,(8)광역의원비례대표,(9)기초의원비례대표,(10)교육의원,(11)교육감")
+ print(
+ "(0) 대표선거명 (1)대통령,(2)국회의원,(3)시도지사,(4)구시군장,(5)시도의원,\
+ (6)구시군의회의원, (7)국회의원비례대표,(8)광역의원비례대표,(9)기초의원비례대표,(10)교육의원,(11)교육감"
+ )
else:
- print("sgTypecode를 입력하면 해당 sgTypecode와 일치하는 sgId 값을 출력합니다. 여러 개 입력하고 싶으면 ,로 구분해 주세요.")
+ print(
+ "sgTypecode를 입력하면 해당 sgTypecode와 일치하는 sgId 값을 출력합니다. 여러 개 입력하고 싶으면 ,로 구분해 주세요."
+ )
-url = 'http://apis.data.go.kr/9760000/CommonCodeService/getCommonSgCodeList'
-params ={'serviceKey' : OpenDataPortalSecrets.service_key,\
- 'pageNo' : '1', 'numOfRows' : '1000'}
+url = "http://apis.data.go.kr/9760000/CommonCodeService/getCommonSgCodeList"
+params = {
+ "serviceKey": OpenDataPortalSecrets.service_key,
+ "pageNo": "1",
+ "numOfRows": "1000",
+}
response = requests.get(url, params=params)
-xml_data = response.content.decode('utf-8')
+xml_data = response.content.decode("utf-8")
# Parse the XML data
root = ET.fromstring(xml_data)
# Find all elements where sgTypecode is equal to INPUT and extract their sgId values
sgIds = set()
-for code in input("Input the number of sgTypecode: ").split(','):
- for item in root.findall(f'.//item[sgTypecode=\"{code}\"]'):
- sgId_element = item.find('sgId')
+for code in input("Input the number of sgTypecode: ").split(","):
+ for item in root.findall(f'.//item[sgTypecode="{code}"]'):
+ sgId_element = item.find("sgId")
if sgId_element is not None:
sgId = sgId_element.text
sgIds.add(sgId)
# Print the sgId values
for sgId in sorted(sgIds):
- print(sgId)
\ No newline at end of file
+ print(sgId)
diff --git a/__init__.py b/__init__.py
index d2639ae..20221e2 100644
--- a/__init__.py
+++ b/__init__.py
@@ -1,3 +1,3 @@
"""
이 파일은 프로젝트 루트 폴더를 패키지로 인식하게 해주는 역할을 합니다.
-"""
\ No newline at end of file
+"""
diff --git a/configurations/__init__.py b/configurations/__init__.py
index e88c5ca..21135bf 100644
--- a/configurations/__init__.py
+++ b/configurations/__init__.py
@@ -1,4 +1,4 @@
"""
스크립트 실행에 필요한 환경변수를 정의합니다.
환경변수는 프로젝트 루트 폴더에 .env 파일을 생성하여 불러올 수 있습니다.
-"""
\ No newline at end of file
+"""
diff --git a/configurations/secrets.py b/configurations/secrets.py
index f8adc5b..2bcdfc7 100644
--- a/configurations/secrets.py
+++ b/configurations/secrets.py
@@ -5,22 +5,25 @@
from dotenv import load_dotenv
# .env 파일로부터 환경변수를 불러옵니다.
-load_dotenv(
- verbose=False,
- override=False
-)
+load_dotenv(verbose=False, override=False)
+
class MongoDBSecrets:
"""
MongoDB 연결을 위한 연결 정보를 정의합니다.
"""
- connection_uri = str(os.getenv("MONGO_CONNECTION_URI") or "mongodb://localhost:27017")
+
+ connection_uri = str(
+ os.getenv("MONGO_CONNECTION_URI") or "mongodb://localhost:27017"
+ )
"""PyMongo 클라이언트에서 데이터베이스 연결에 사용할 연결 uri입니다."""
database_name = str(os.getenv("MONGO_DATABASE") or "local")
"""PyMongo 클라이언트에서 사용할 데이터베이스 이름입니다."""
+
class OpenDataPortalSecrets:
"""
공공데이터포털(data.go.kr) API 호출에 필요한 서비스 키를 정의합니다.
"""
- service_key = str(os.getenv("OPEN_DATA_SERICE_KEY") or "")
\ No newline at end of file
+
+ service_key = str(os.getenv("OPEN_DATA_SERICE_KEY") or "")
diff --git a/db/__init__.py b/db/__init__.py
index b528b32..c2cab83 100644
--- a/db/__init__.py
+++ b/db/__init__.py
@@ -1,3 +1,3 @@
"""
MongoDB 클라이언트 객체 및 데이터베이스에 값을 넣고 빼는 함수를 정의합니다.
-"""
\ No newline at end of file
+"""
diff --git a/db/client.py b/db/client.py
index 3d83d42..194119e 100644
--- a/db/client.py
+++ b/db/client.py
@@ -4,4 +4,4 @@
client = pymongo.MongoClient(MongoDBSecrets.connection_uri)
"""
MongoDB 클라이언트 객체입니다.
-"""
\ No newline at end of file
+"""
diff --git a/scrap/__init__.py b/scrap/__init__.py
index bb9b620..ae277ea 100644
--- a/scrap/__init__.py
+++ b/scrap/__init__.py
@@ -1,3 +1,3 @@
"""
지방의회 크롤링을 위한 파일들을 모아놓은 패키지입니다.
-"""
\ No newline at end of file
+"""
diff --git a/scrap/examples/__init__.py b/scrap/examples/__init__.py
index fd09f19..ab05e7a 100644
--- a/scrap/examples/__init__.py
+++ b/scrap/examples/__init__.py
@@ -1,3 +1,3 @@
"""
예시 파일들을 모아놓은 폴더입니다.
-"""
\ No newline at end of file
+"""
diff --git a/scrap/examples/database.py b/scrap/examples/database.py
index 371b168..d8c3dfe 100644
--- a/scrap/examples/database.py
+++ b/scrap/examples/database.py
@@ -3,7 +3,12 @@
"""
from scrap.utils.database import save_to_database
-from scrap.local_councils.seoul import scrap_dongdaemungu, scrap_gwangjingu, scrap_junggu
+from scrap.local_councils.seoul import (
+ scrap_dongdaemungu,
+ scrap_gwangjingu,
+ scrap_junggu,
+)
+
def main() -> None:
# 서울시 동대문구의회 크롤링 결과를 데이터베이스에 저장합니다.
@@ -13,5 +18,6 @@ def main() -> None:
# 서울시 중구의회 크롤링 결과를 데이터베이스에 저장합니다.
save_to_database(scrap_junggu())
-if __name__ == '__main__':
- main()
\ No newline at end of file
+
+if __name__ == "__main__":
+ main()
diff --git a/scrap/examples/junggu_scrap.py b/scrap/examples/junggu_scrap.py
index 7416668..17b74cf 100644
--- a/scrap/examples/junggu_scrap.py
+++ b/scrap/examples/junggu_scrap.py
@@ -10,23 +10,29 @@
full_url = base_url + link
response = requests.get(full_url, verify=False)
-soup = BeautifulSoup(response.text, 'html.parser')
+soup = BeautifulSoup(response.text, "html.parser")
-profiles = soup.find_all('div', class_='profile')
+profiles = soup.find_all("div", class_="profile")
for profile in profiles:
- name = profile.find('em', class_='name').text
- party = profile.find('ul', class_='dot').find('li').find_next_sibling('li').find('span').text
-
+ name = profile.find("em", class_="name").text
+ party = (
+ profile.find("ul", class_="dot")
+ .find("li")
+ .find_next_sibling("li")
+ .find("span")
+ .text
+ )
+
# 프로필보기 링크 가져오기
- profile_link = profile.find('a', class_='start')
+ profile_link = profile.find("a", class_="start")
if profile_link:
- profile_url = base_url + profile_link['href']
-
+ profile_url = base_url + profile_link["href"]
+
# 프로필 페이지로 이동
profile_response = requests.get(profile_url, verify=False)
- profile_soup = BeautifulSoup(profile_response.text, 'html.parser')
-
+ profile_soup = BeautifulSoup(profile_response.text, "html.parser")
+
# 프로필 페이지에서 원하는 정보를 추출하고 출력
# 여기에서 필요한 정보를 추출하는 방법에 따라 코드를 작성해주세요.
@@ -34,10 +40,8 @@
# print('프로필 페이지 URL:', profile_url)
# print('---')
# "소속정당" 정보 추출
- party_info = profile_soup.find('em', text='소속정당 : ')
- party = party_info.find_next('span').string if party_info else '정당 정보 없음'
-
- print('이름:', name)
- print('정당:', party)
-
+ party_info = profile_soup.find("em", text="소속정당 : ")
+ party = party_info.find_next("span").string if party_info else "정당 정보 없음"
+ print("이름:", name)
+ print("정당:", party)
diff --git a/scrap/local_councils/__init__.py b/scrap/local_councils/__init__.py
index d4f68ed..a4e1fc8 100644
--- a/scrap/local_councils/__init__.py
+++ b/scrap/local_councils/__init__.py
@@ -4,4 +4,4 @@
"""
from .daejeon import *
from .ulsan import *
-from .basic import *
\ No newline at end of file
+from .basic import *
diff --git a/scrap/local_councils/basic.py b/scrap/local_councils/basic.py
index 0d52ccb..ec74750 100644
--- a/scrap/local_councils/basic.py
+++ b/scrap/local_councils/basic.py
@@ -7,9 +7,9 @@
import requests
import copy
-regex_pattern = re.compile(r'정\s*\S*\s*당', re.IGNORECASE) # Case-insensitive
+regex_pattern = re.compile(r"정\s*\S*\s*당", re.IGNORECASE) # Case-insensitive
party_keywords = getPartyList()
-party_keywords.append('무소속')
+party_keywords.append("무소속")
def find(soup, element, class_):
@@ -30,10 +30,9 @@ def get_profiles(soup, element, class_, memberlistelement, memberlistclass_):
# 의원 목록 사이트에서 의원 프로필을 가져옴
if memberlistelement is not None:
try:
- soup = find_all(soup, memberlistelement,
- class_=memberlistclass_)[0]
+ soup = find_all(soup, memberlistelement, class_=memberlistclass_)[0]
except Exception:
- raise RuntimeError('[basic.py] 의원 목록 사이트에서 의원 프로필을 가져오는데 실패했습니다.')
+ raise RuntimeError("[basic.py] 의원 목록 사이트에서 의원 프로필을 가져오는데 실패했습니다.")
return find_all(soup, element, class_)
@@ -41,7 +40,10 @@ def getDataFromAPI(url_format, data_uid, name_id, party_id) -> Councilor:
# API로부터 의원 정보를 가져옴
url = url_format.format(data_uid)
result = requests.get(url).json()
- return Councilor(name=result[name_id] if result[name_id] else '이름 정보 없음', party=result[party_id] if result[party_id] else '정당 정보 없음')
+ return Councilor(
+ name=result[name_id] if result[name_id] else "이름 정보 없음",
+ party=result[party_id] if result[party_id] else "정당 정보 없음",
+ )
def get_name(profile, element, class_, wrapper_element, wrapper_class_):
@@ -49,29 +51,28 @@ def get_name(profile, element, class_, wrapper_element, wrapper_class_):
if wrapper_element is not None:
profile = find_all(profile, wrapper_element, class_=wrapper_class_)[0]
name_tag = find(profile, element, class_)
- if name_tag.find('span'):
+ if name_tag.find("span"):
name_tag = copy.copy(name_tag)
# span 태그 안의 것들을 다 지움
- for span in name_tag.find_all('span'):
+ for span in name_tag.find_all("span"):
span.decompose()
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
+
# name은 길고 그 중 strong태그 안에 이름이 있는 경우. 은평구, 수원시 등.
if name_tag.strong is not None:
- name = name_tag.strong.get_text(
- strip=True) if name_tag.strong else "이름 정보 없음"
- name = name.split('(')[0].split(
- ':')[-1].strip() # 이름 뒷 한자이름, 앞 '이 름:' 제거
- # TODO : 만약 이름이 우연히 국회의장 혹은 김의원박 이라면?
+ name = name_tag.strong.get_text(strip=True) if name_tag.strong else "이름 정보 없음"
+ name = name.split("(")[0].split(":")[-1].strip() # 이름 뒷 한자이름, 앞 '이 름:' 제거
+ # TODO : 만약 이름이 우연히 아래 단어를 포함하는 경우를 생각해볼만 함.
if len(name) > 3:
# 수식어가 이름 앞이나 뒤에 붙어있는 경우
- for keyword in ['부의장', '의원', '의장']: # 119, 강서구 등
+ for keyword in ["부의장", "의원", "의장"]: # 119, 강서구 등
if keyword in name:
- name = name.replace(keyword, '').strip()
+ name = name.replace(keyword, "").strip()
for keyword in party_keywords:
if keyword in name: # 인천 서구 등
- name = name.replace(keyword, '').strip()
+ name = name.replace(keyword, "").strip()
break
- name = name.split(' ')[0] # 이름 뒤에 직책이 따라오는 경우
+ name = name.split(" ")[0] # 이름 뒤에 직책이 따라오는 경우
return name
@@ -89,37 +90,42 @@ def goto_profilesite(profile, wrapper_element, wrapper_class_, wrapper_txt, url)
# 프로필보기 링크 가져오기
profile_link = find(profile, wrapper_element, class_=wrapper_class_)
if wrapper_txt is not None:
- profile_links = find_all(profile, 'a', class_=wrapper_class_)
- profile_link = [
- link for link in profile_links if link.text == wrapper_txt][0]
+ profile_links = find_all(profile, "a", class_=wrapper_class_)
+ profile_link = [link for link in profile_links if link.text == wrapper_txt][0]
if profile_link is None:
- raise RuntimeError('[basic.py] 의원 프로필에서 프로필보기 링크를 가져오는데 실패했습니다.')
+ raise RuntimeError("[basic.py] 의원 프로필에서 프로필보기 링크를 가져오는데 실패했습니다.")
# if base_url[-1] != '/':
# base_url = base_url + '/'
- profile_url = base_url + profile_link['href']
+ profile_url = base_url + profile_link["href"]
try:
profile = get_soup(profile_url, verify=False)
except Exception:
- raise RuntimeError('[basic.py] \'//\'가 있진 않나요?', ' url: ', profile_url)
+ raise RuntimeError("[basic.py] '//'가 있진 않나요?", " url: ", profile_url)
return profile
-def get_party(profile, element, class_, wrapper_element, wrapper_class_, wrapper_txt, url):
+def get_party(
+ profile, element, class_, wrapper_element, wrapper_class_, wrapper_txt, url
+):
# 의원 프로필에서 의원이 몸담는 정당 이름을 가져옴
if wrapper_element is not None:
profile = goto_profilesite(
- profile, wrapper_element, wrapper_class_, wrapper_txt, url)
- party_pulp_list = list(filter(lambda x: regex_pattern.search(
- str(x)), find_all(profile, element, class_)))
+ profile, wrapper_element, wrapper_class_, wrapper_txt, url
+ )
+ party_pulp_list = list(
+ filter(
+ lambda x: regex_pattern.search(str(x)), find_all(profile, element, class_)
+ )
+ )
if party_pulp_list == []:
- raise RuntimeError('[basic.py] 정당정보 regex 실패')
+ raise RuntimeError("[basic.py] 정당정보 regex 실패")
party_pulp = party_pulp_list[0]
- party_string = party_pulp.get_text(strip=True).split(' ')[-1]
+ party_string = party_pulp.get_text(strip=True).split(" ")[-1]
while True:
if (party := extract_party(party_string)) is not None:
return party
- if (party_pulp := party_pulp.find_next('span')) is not None:
- party_string = party_pulp.text.strip().split(' ')[-1]
+ if (party_pulp := party_pulp.find_next("span")) is not None:
+ party_string = party_pulp.text.strip().split(" ")[-1]
else:
return "[basic.py] 정당 정보 파싱 불가"
@@ -128,54 +134,68 @@ def get_party_easy(profile, wrapper_element, wrapper_class_, wrapper_txt, url):
# 의원 프로필에서 의원이 몸담는 정당 이름을 가져옴
if wrapper_element is not None:
profile = goto_profilesite(
- profile, wrapper_element, wrapper_class_, wrapper_txt, url)
+ profile, wrapper_element, wrapper_class_, wrapper_txt, url
+ )
party = extract_party(profile.text)
- assert (party is not None)
+ assert party is not None
return party
-def scrap_basic(url, cid, args: ScrapBasicArgument, encoding='utf-8') -> ScrapResult:
- '''의원 상세약력 스크랩
+def scrap_basic(url, cid, args: ScrapBasicArgument, encoding="utf-8") -> ScrapResult:
+ """의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:param n: 의회 id
:param encoding: 받아온 soup 인코딩
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
- '''
+ """
soup = get_soup(url, verify=False, encoding=encoding)
councilors: list[Councilor] = []
- profiles = get_profiles(soup, args.pf_elt, args.pf_cls,
- args.pf_memlistelt, args.pf_memlistcls)
- print(cid, '번째 의회에는,', len(profiles), '명의 의원이 있습니다.') # 디버깅용.
+
+ profiles = get_profiles(
+ soup, args.pf_elt, args.pf_cls, args.pf_memlistelt, args.pf_memlistcls
+ )
+ print(cid, "번째 의회에는,", len(profiles), "명의 의원이 있습니다.") # 디버깅용.
for profile in profiles:
- name = party = ''
+ name = party = ""
try:
- name = get_name(profile, args.name_elt, args.name_cls,
- args.name_wrapelt, args.name_wrapcls)
+ name = get_name(
+ profile,
+ args.name_elt,
+ args.name_cls,
+ args.name_wrapelt,
+ args.name_wrapcls,
+ )
except Exception as e:
- raise RuntimeError(
- '[basic.py] 의원 이름을 가져오는데 실패했습니다. 이유 : ' + str(e))
+ raise RuntimeError("[basic.py] 의원 이름을 가져오는데 실패했습니다. 이유 : " + str(e))
try:
- party = get_party(profile, args.pty_elt, args.pty_cls,
- args.pty_wrapelt, args.pty_wrapcls, args.pty_wraptxt, url)
+ party = get_party(
+ profile,
+ args.pty_elt,
+ args.pty_cls,
+ args.pty_wrapelt,
+ args.pty_wrapcls,
+ args.pty_wraptxt,
+ url,
+ )
except Exception as e:
try:
party = get_party_easy(
- profile, args.pty_wrapelt, args.pty_wrapcls, args.pty_wraptxt, url)
+ profile, args.pty_wrapelt, args.pty_wrapcls, args.pty_wraptxt, url
+ )
except Exception:
- raise RuntimeError(
- '[basic.py] 의원 정당을 가져오는데 실패했습니다. 이유: ' + str(e))
-
+ raise RuntimeError("[basic.py] 의원 정당을 가져오는데 실패했습니다. 이유: " + str(e))
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id=str(cid),
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-if __name__ == '__main__':
+if __name__ == "__main__":
args3 = ScrapBasicArgument(
- pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_elt='em')
- print(scrap_basic('https://www.yscl.go.kr/kr/member/name.do', 3, args3)) # 서울 용산구
+ pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em"
+ )
+ print(scrap_basic("https://www.yscl.go.kr/kr/member/name.do", 3, args3)) # 서울 용산구
diff --git a/scrap/local_councils/busan.py b/scrap/local_councils/busan.py
index e859a82..be10dc3 100644
--- a/scrap/local_councils/busan.py
+++ b/scrap/local_councils/busan.py
@@ -4,7 +4,9 @@
from scrap.utils.requests import get_soup
-def scrap_26(url='https://www.bsjunggu.go.kr/council/board/list.junggu?boardId=BBS_0000118&menuCd=DOM_000000503003000000&contentsSid=755&cpath=%2Fcouncil') -> ScrapResult:
+def scrap_26(
+ url="https://www.bsjunggu.go.kr/council/board/list.junggu?boardId=BBS_0000118&menuCd=DOM_000000503003000000&contentsSid=755&cpath=%2Fcouncil",
+) -> ScrapResult:
"""부산시 중구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -13,12 +15,16 @@ def scrap_26(url='https://www.bsjunggu.go.kr/council/board/list.junggu?boardId=B
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find('div', class_='bbs_blog council').find_all('dl'):
- name_tag = profile.find_next('dt')
- name = name_tag.get_text(strip=True).split()[-1].strip() if name_tag else "이름 정보 없음"
+ for profile in soup.find("div", class_="bbs_blog council").find_all("dl"):
+ name_tag = profile.find_next("dt")
+ name = (
+ name_tag.get_text(strip=True).split()[-1].strip()
+ if name_tag
+ else "이름 정보 없음"
+ )
- party = '정당 정보 없음'
- party_info = profile.find_next('li')
+ party = "정당 정보 없음"
+ party_info = profile.find_next("li")
if party_info:
party = party_info.get_text(strip=True)[3:]
@@ -27,11 +33,13 @@ def scrap_26(url='https://www.bsjunggu.go.kr/council/board/list.junggu?boardId=B
return ScrapResult(
council_id="busan-junggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_27(url='https://www.bsseogu.go.kr/council/board/list.bsseogu?boardId=BBS_0000097&categoryCode1=8&menuCd=DOM_000000603001000000&contentsSid=785&cpath=%2Fcouncil') -> ScrapResult:
+def scrap_27(
+ url="https://www.bsseogu.go.kr/council/board/list.bsseogu?boardId=BBS_0000097&categoryCode1=8&menuCd=DOM_000000603001000000&contentsSid=785&cpath=%2Fcouncil",
+) -> ScrapResult:
"""부산시 서구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -40,22 +48,22 @@ def scrap_27(url='https://www.bsseogu.go.kr/council/board/list.bsseogu?boardId=B
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- # 프로필 링크 스크랩을 위해 base_url 추출
+ # 프로필 링크 스크랩을 위해 base_url 추출
parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
- for profile in soup.find_all('div', class_='intro'):
- name_tag = profile.find_next('span').find_next('span')
+ for profile in soup.find_all("div", class_="intro"):
+ name_tag = profile.find_next("span").find_next("span")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
+ party = "정당 정보 없음"
# 프로필보기 링크 가져오기
- profile_link = profile.find('a')
+ profile_link = profile.find("a")
if profile_link:
- profile_url = base_url + '/council' + profile_link['href']
+ profile_url = base_url + "/council" + profile_link["href"]
profile_soup = get_soup(profile_url, verify=False)
- party_info = profile_soup.find('span', string='소속정당')
+ party_info = profile_soup.find("span", string="소속정당")
if party_info and (party_span := party_info.parent) is not None:
party = party_span.text[4:].strip()
@@ -64,11 +72,13 @@ def scrap_27(url='https://www.bsseogu.go.kr/council/board/list.bsseogu?boardId=B
return ScrapResult(
council_id="busan-seogu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_28(url='https://www.bsdonggu.go.kr/council/index.donggu?menuCd=DOM_000000502004000000') -> ScrapResult:
+def scrap_28(
+ url="https://www.bsdonggu.go.kr/council/index.donggu?menuCd=DOM_000000502004000000",
+) -> ScrapResult:
"""부산시 동구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -77,25 +87,25 @@ def scrap_28(url='https://www.bsdonggu.go.kr/council/index.donggu?menuCd=DOM_000
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='council_box'):
- name_tag = profile.find_next('span', class_='n2')
+ for profile in soup.find_all("div", class_="council_box"):
+ name_tag = profile.find_next("span", class_="n2")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = profile.find_next('span', class_='n1')
+ party = "정당 정보 없음"
+ party_info = profile.find_next("span", class_="n1")
if party_info:
- party = party_info.get_text(strip=True).split('(')[1][:-1].strip()
+ party = party_info.get_text(strip=True).split("(")[1][:-1].strip()
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="busan-donggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_29(url='https://www.yeongdo.go.kr/council/01211/01212.web') -> ScrapResult:
+def scrap_29(url="https://www.yeongdo.go.kr/council/01211/01212.web") -> ScrapResult:
"""부산시 영도구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -104,11 +114,15 @@ def scrap_29(url='https://www.yeongdo.go.kr/council/01211/01212.web') -> ScrapRe
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='even-grid gap3pct panel1 p01205bg'):
- name_tag = profile.find_next('strong', class_='h1 title')
- name = name_tag.get_text(strip=True).split(' ')[0].strip() if name_tag else "이름 정보 없음"
+ for profile in soup.find_all("div", class_="even-grid gap3pct panel1 p01205bg"):
+ name_tag = profile.find_next("strong", class_="h1 title")
+ name = (
+ name_tag.get_text(strip=True).split(" ")[0].strip()
+ if name_tag
+ else "이름 정보 없음"
+ )
- party = '정당 정보 없음'
+ party = "정당 정보 없음"
# TODO
councilors.append(Councilor(name=name, party=party))
@@ -116,51 +130,57 @@ def scrap_29(url='https://www.yeongdo.go.kr/council/01211/01212.web') -> ScrapRe
return ScrapResult(
council_id="busan-yeongdogu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_30(url='https://council.busanjin.go.kr/content/member/member.html') -> ScrapResult:
+def scrap_30(
+ url="https://council.busanjin.go.kr/content/member/member.html",
+) -> ScrapResult:
"""부산시 부산진구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""
- soup = get_soup(url, verify=False).find('ul', class_='mlist')
+ soup = get_soup(url, verify=False).find("ul", class_="mlist")
councilors: list[Councilor] = []
- for profile in soup.find_all('dl'):
- name_tag = profile.find('dd', class_='name')
+ for profile in soup.find_all("dl"):
+ name_tag = profile.find("dd", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = profile.find_all('b')[2]
+ party = "정당 정보 없음"
+ party_info = profile.find_all("b")[2]
if party_info:
- party = party_info.find_next('span', class_='itemContent').get_text(strip=True)
+ party = party_info.find_next("span", class_="itemContent").get_text(
+ strip=True
+ )
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="busan-busanjingu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_31(url='http://council.dongnae.go.kr/source/kr/member/active.html') -> ScrapResult:
+def scrap_31(
+ url="http://council.dongnae.go.kr/source/kr/member/active.html",
+) -> ScrapResult:
"""부산시 동래구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""
- soup = get_soup(url, verify=False, encoding='euc-kr')
+ soup = get_soup(url, verify=False, encoding="euc-kr")
councilors: list[Councilor] = []
- for name_tag in soup.find_all('li', class_='name'):
+ for name_tag in soup.find_all("li", class_="name"):
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = name_tag.find_next('li').find_next('li')
+ party = "정당 정보 없음"
+ party_info = name_tag.find_next("li").find_next("li")
if party_info:
party = party_info.get_text(strip=True).split()[-1].strip()
@@ -169,11 +189,11 @@ def scrap_31(url='http://council.dongnae.go.kr/source/kr/member/active.html') ->
return ScrapResult(
council_id="busan-dongnaegu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_32(url='https://council.bsnamgu.go.kr/kr/member/active') -> ScrapResult:
+def scrap_32(url="https://council.bsnamgu.go.kr/kr/member/active") -> ScrapResult:
"""부산시 남구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -182,25 +202,32 @@ def scrap_32(url='https://council.bsnamgu.go.kr/kr/member/active') -> ScrapResul
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('dl', class_='profile'):
- name_tag = profile.find('strong')
+ for profile in soup.find_all("dl", class_="profile"):
+ name_tag = profile.find("strong")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = profile.find('span', class_='sbj', string='정 당')
+ party = "정당 정보 없음"
+ party_info = profile.find("span", class_="sbj", string="정 당")
if party_info:
- party = party_info.find_next('span', class_='detail').get_text(strip=True).split()[-1].strip()
+ party = (
+ party_info.find_next("span", class_="detail")
+ .get_text(strip=True)
+ .split()[-1]
+ .strip()
+ )
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="busan-namgu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_33(url='https://www.bsbukgu.go.kr/council/index.bsbukgu?menuCd=DOM_000000808001001000') -> ScrapResult:
+def scrap_33(
+ url="https://www.bsbukgu.go.kr/council/index.bsbukgu?menuCd=DOM_000000808001001000",
+) -> ScrapResult:
"""부산시 북구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -209,12 +236,12 @@ def scrap_33(url='https://www.bsbukgu.go.kr/council/index.bsbukgu?menuCd=DOM_000
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('dl', class_='info'):
- name_tag = profile.find('span')
+ for profile in soup.find_all("dl", class_="info"):
+ name_tag = profile.find("span")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = profile.find('span', string='소속정당')
+ party = "정당 정보 없음"
+ party_info = profile.find("span", string="소속정당")
if party_info:
party = party_info.parent.get_text(strip=True).split()[-1].strip()
@@ -223,33 +250,35 @@ def scrap_33(url='https://www.bsbukgu.go.kr/council/index.bsbukgu?menuCd=DOM_000
return ScrapResult(
council_id="busan-bukgu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_34(url='https://council.haeundae.go.kr/board/list.do?boardId=BBS_0000096&categoryCode1=08&menuCd=DOM_000000702001001000&contentsSid=330') -> ScrapResult:
+def scrap_34(
+ url="https://council.haeundae.go.kr/board/list.do?boardId=BBS_0000096&categoryCode1=08&menuCd=DOM_000000702001001000&contentsSid=330",
+) -> ScrapResult:
"""부산시 해운대구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""
- soup = get_soup(url, verify=False).find('div', class_='initial_list')
+ soup = get_soup(url, verify=False).find("div", class_="initial_list")
councilors: list[Councilor] = []
# 프로필 링크 스크랩을 위해 base_url 추출
parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
- for name_tag in soup.find_all('dd'):
+ for name_tag in soup.find_all("dd"):
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
# 프로필보기 링크 가져오기
- profile_link = name_tag.find('a')
+ profile_link = name_tag.find("a")
if profile_link:
- profile_url = base_url + profile_link['href']
+ profile_url = base_url + profile_link["href"]
profile_soup = get_soup(profile_url, verify=False)
- party_info = profile_soup.find('span', string='소속정당')
+ party_info = profile_soup.find("span", string="소속정당")
if party_info and (party_span := party_info.parent) is not None:
party = party_span.text[4:].strip()
@@ -258,26 +287,28 @@ def scrap_34(url='https://council.haeundae.go.kr/board/list.do?boardId=BBS_00000
return ScrapResult(
council_id="busan-haeundaegu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_35(url='https://council.gijang.go.kr/source/korean/member/active.html') -> ScrapResult:
+def scrap_35(
+ url="https://council.gijang.go.kr/source/korean/member/active.html",
+) -> ScrapResult:
"""부산시 기장군 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""
- soup = get_soup(url, verify=False, encoding='euc-kr')
+ soup = get_soup(url, verify=False, encoding="euc-kr")
councilors: list[Councilor] = []
- for profile in soup.find_all('ul', class_='wulli bul02'):
- li_tags = profile.find_all('li')
+ for profile in soup.find_all("ul", class_="wulli bul02"):
+ li_tags = profile.find_all("li")
name_tag = li_tags[0]
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
+ party = "정당 정보 없음"
party_info = li_tags[2]
if party_info:
party = party_info.get_text(strip=True).split()[-1].strip()
@@ -287,11 +318,13 @@ def scrap_35(url='https://council.gijang.go.kr/source/korean/member/active.html'
return ScrapResult(
council_id="busan-gijanggun",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_36(url='https://www.saha.go.kr/council/congressMember/list03.do?mId=0403000000') -> ScrapResult:
+def scrap_36(
+ url="https://www.saha.go.kr/council/congressMember/list03.do?mId=0403000000",
+) -> ScrapResult:
"""부산시 사하구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -300,12 +333,12 @@ def scrap_36(url='https://www.saha.go.kr/council/congressMember/list03.do?mId=04
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for district_tag in soup.find_all('div', class_='list_member'):
- for name_tag in district_tag.find_all('h4', class_='name'):
+ for district_tag in soup.find_all("div", class_="list_member"):
+ for name_tag in district_tag.find_all("h4", class_="name"):
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = name_tag.find_next('span', string='소속당 : ')
+ party = "정당 정보 없음"
+ party_info = name_tag.find_next("span", string="소속당 : ")
if party_info:
party = party_info.parent.get_text(strip=True)[7:].strip()
@@ -314,27 +347,29 @@ def scrap_36(url='https://www.saha.go.kr/council/congressMember/list03.do?mId=04
return ScrapResult(
council_id="busan-sahagu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_37(url='https://council.geumjeong.go.kr/index.geumj?menuCd=DOM_000000716001000000') -> ScrapResult:
+def scrap_37(
+ url="https://council.geumjeong.go.kr/index.geumj?menuCd=DOM_000000716001000000",
+) -> ScrapResult:
"""부산시 금정구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""
- soup = get_soup(url, verify=False).find('div', class_='council_list')
+ soup = get_soup(url, verify=False).find("div", class_="council_list")
councilors: list[Councilor] = []
- for profile in soup.find_all('a'):
- name_tag = profile.find('span', class_='tit').find('span')
+ for profile in soup.find_all("a"):
+ name_tag = profile.find("span", class_="tit").find("span")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- profile_url = profile['href'][:65] + '1' + profile['href'][66:]
+ profile_url = profile["href"][:65] + "1" + profile["href"][66:]
profile_soup = get_soup(profile_url, verify=False)
- party_info = profile_soup.find('span', class_='name', string='정당')
+ party_info = profile_soup.find("span", class_="name", string="정당")
if party_info and (party_span := party_info.parent) is not None:
party = party_span.text[2:].strip()
@@ -343,11 +378,13 @@ def scrap_37(url='https://council.geumjeong.go.kr/index.geumj?menuCd=DOM_0000007
return ScrapResult(
council_id="busan-geumjeonggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_38(url='https://www.bsgangseo.go.kr/council/contents.do?mId=0203000000') -> ScrapResult:
+def scrap_38(
+ url="https://www.bsgangseo.go.kr/council/contents.do?mId=0203000000",
+) -> ScrapResult:
"""부산시 강서구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -356,14 +393,16 @@ def scrap_38(url='https://www.bsgangseo.go.kr/council/contents.do?mId=0203000000
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile_img in soup.find_all('button', class_='btn_close'):
- profile = profile_img.find_next('dl')
+ for profile_img in soup.find_all("button", class_="btn_close"):
+ profile = profile_img.find_next("dl")
- name_tag = profile.find('dd', class_='name')
- name = name_tag.get_text(strip=True).split()[0].strip() if name_tag else "이름 정보 없음"
+ name_tag = profile.find("dd", class_="name")
+ name = (
+ name_tag.get_text(strip=True).split()[0].strip() if name_tag else "이름 정보 없음"
+ )
- party = '정당 정보 없음'
- party_info = profile.find('span', class_='bold', string='정당 : ')
+ party = "정당 정보 없음"
+ party_info = profile.find("span", class_="bold", string="정당 : ")
if party_info:
party = party_info.parent.get_text(strip=True)[5:].strip()
@@ -372,11 +411,13 @@ def scrap_38(url='https://www.bsgangseo.go.kr/council/contents.do?mId=0203000000
return ScrapResult(
council_id="busan-gangseogu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_39(url='https://www.yeonje.go.kr/council/assemblyIntro/list.do?mId=0201000000') -> ScrapResult:
+def scrap_39(
+ url="https://www.yeonje.go.kr/council/assemblyIntro/list.do?mId=0201000000",
+) -> ScrapResult:
"""부산시 연제구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -385,11 +426,11 @@ def scrap_39(url='https://www.yeonje.go.kr/council/assemblyIntro/list.do?mId=020
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('dl', class_='info'):
- name_tag = profile.find('span')
+ for profile in soup.find_all("dl", class_="info"):
+ name_tag = profile.find("span")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당정보없음'
+ party = "정당정보없음"
# TODO
@@ -398,11 +439,13 @@ def scrap_39(url='https://www.yeonje.go.kr/council/assemblyIntro/list.do?mId=020
return ScrapResult(
council_id="busan-yeonjegu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_40(url='https://www.suyeong.go.kr/council/index.suyeong?menuCd=DOM_000001402001001000&link=success&cpath=%2Fcouncil') -> ScrapResult:
+def scrap_40(
+ url="https://www.suyeong.go.kr/council/index.suyeong?menuCd=DOM_000001402001001000&link=success&cpath=%2Fcouncil",
+) -> ScrapResult:
"""부산시 수영구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -411,12 +454,12 @@ def scrap_40(url='https://www.suyeong.go.kr/council/index.suyeong?menuCd=DOM_000
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='mem_info'):
- name_tag = profile.find('span', class_='name').find('span')
+ for profile in soup.find_all("div", class_="mem_info"):
+ name_tag = profile.find("span", class_="name").find("span")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = profile.find('span', string='소속정당 :')
+ party = "정당 정보 없음"
+ party_info = profile.find("span", string="소속정당 :")
if party_info:
party = party_info.parent.get_text(strip=True)[6:].strip()
@@ -425,11 +468,13 @@ def scrap_40(url='https://www.suyeong.go.kr/council/index.suyeong?menuCd=DOM_000
return ScrapResult(
council_id="busan-suyeonggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_41(url='https://www.sasang.go.kr/council/index.sasang?menuCd=DOM_000000202005000000') -> ScrapResult:
+def scrap_41(
+ url="https://www.sasang.go.kr/council/index.sasang?menuCd=DOM_000000202005000000",
+) -> ScrapResult:
"""부산시 사상구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -438,24 +483,28 @@ def scrap_41(url='https://www.sasang.go.kr/council/index.sasang?menuCd=DOM_00000
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for district in soup.find_all('ul', class_='council_list'):
- for profile in district.find_all('li'):
- name_tag = profile.find('span', class_='tit')
- name = name_tag.get_text(strip=True).split()[0].strip() if name_tag else "이름 정보 없음"
-
- party = '정당 정보 없음'
- party_info = profile.find('span', class_='con')
+ for district in soup.find_all("ul", class_="council_list"):
+ for profile in district.find_all("li"):
+ name_tag = profile.find("span", class_="tit")
+ name = (
+ name_tag.get_text(strip=True).split()[0].strip()
+ if name_tag
+ else "이름 정보 없음"
+ )
+
+ party = "정당 정보 없음"
+ party_info = profile.find("span", class_="con")
if party_info:
- party = party_info.get_text(strip=True).split(']')[0].strip()[1:]
+ party = party_info.get_text(strip=True).split("]")[0].strip()[1:]
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="busan-sasanggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-if __name__ == '__main__':
- print(scrap_41())
\ No newline at end of file
+if __name__ == "__main__":
+ print(scrap_41())
diff --git a/scrap/local_councils/daegu.py b/scrap/local_councils/daegu.py
index a11baac..f565e11 100644
--- a/scrap/local_councils/daegu.py
+++ b/scrap/local_councils/daegu.py
@@ -4,21 +4,25 @@
from scrap.utils.requests import get_soup
-def scrap_42(url='https://junggucouncil.daegu.kr/source/main03/main01.html?d_th=8') -> ScrapResult:
+def scrap_42(
+ url="https://junggucouncil.daegu.kr/source/main03/main01.html?d_th=8",
+) -> ScrapResult:
"""대전시 중구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""
- soup = get_soup(url, verify=False, encoding='euc-kr')
+ soup = get_soup(url, verify=False, encoding="euc-kr")
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
- name_tag = profile.find('li', class_='name')
- name = name_tag.get_text(strip=True).split()[1].strip() if name_tag else "이름 정보 없음"
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find("li", class_="name")
+ name = (
+ name_tag.get_text(strip=True).split()[1].strip() if name_tag else "이름 정보 없음"
+ )
- party = '정당 정보 없음'
- party_info = name_tag.find_next('li').find_next('li')
+ party = "정당 정보 없음"
+ party_info = name_tag.find_next("li").find_next("li")
if party_info:
party = party_info.get_text(strip=True).split()[-1].strip()
@@ -27,11 +31,13 @@ def scrap_42(url='https://junggucouncil.daegu.kr/source/main03/main01.html?d_th=
return ScrapResult(
council_id="daejeon-junggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_43(url='https://www.donggucl.daegu.kr/content/member/member.html') -> ScrapResult:
+def scrap_43(
+ url="https://www.donggucl.daegu.kr/content/member/member.html",
+) -> ScrapResult:
"""대전시 동구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -39,22 +45,26 @@ def scrap_43(url='https://www.donggucl.daegu.kr/content/member/member.html') ->
"""
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
-
- # 프로필 링크 스크랩을 위해 base_url 추출
+
+ # 프로필 링크 스크랩을 위해 base_url 추출
parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
- for name_tag in soup.find_all('dd', class_='name'):
- name = name_tag.get_text(strip=True).split('(')[0].strip() if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
+ for name_tag in soup.find_all("dd", class_="name"):
+ name = (
+ name_tag.get_text(strip=True).split("(")[0].strip()
+ if name_tag
+ else "이름 정보 없음"
+ )
+ party = "정당 정보 없음"
- profile_link = name_tag.find_next('a', class_='abtn_profile')
+ profile_link = name_tag.find_next("a", class_="abtn_profile")
if profile_link:
- profile_url = base_url + profile_link['href']
+ profile_url = base_url + profile_link["href"]
profile_soup = get_soup(profile_url, verify=False)
- party_info = profile_soup.find('th', scope='row', string='소속정당')
- if party_info and (party_span := party_info.find_next('td')) is not None:
+ party_info = profile_soup.find("th", scope="row", string="소속정당")
+ if party_info and (party_span := party_info.find_next("td")) is not None:
party = party_span.get_text(strip=True)
councilors.append(Councilor(name=name, party=party))
@@ -62,11 +72,11 @@ def scrap_43(url='https://www.donggucl.daegu.kr/content/member/member.html') ->
return ScrapResult(
council_id="daejeon-donggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_44(url='https://www.dgscouncil.go.kr/kr/member/active') -> ScrapResult:
+def scrap_44(url="https://www.dgscouncil.go.kr/kr/member/active") -> ScrapResult:
"""대전시 서구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -75,12 +85,16 @@ def scrap_44(url='https://www.dgscouncil.go.kr/kr/member/active') -> ScrapResult
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('dl', class_='profile'):
- name_tag = profile.find('strong', class_='name')
- name = name_tag.get_text(strip=True).split('(')[0].strip() if name_tag else "이름 정보 없음"
+ for profile in soup.find_all("dl", class_="profile"):
+ name_tag = profile.find("strong", class_="name")
+ name = (
+ name_tag.get_text(strip=True).split("(")[0].strip()
+ if name_tag
+ else "이름 정보 없음"
+ )
- party = '정당 정보 없음'
- party_info = profile.find('li').find_next('li').find_next('li')
+ party = "정당 정보 없음"
+ party_info = profile.find("li").find_next("li").find_next("li")
if party_info:
party = party_info.get_text(strip=True).split()[-1].strip()
@@ -89,11 +103,13 @@ def scrap_44(url='https://www.dgscouncil.go.kr/kr/member/active') -> ScrapResult
return ScrapResult(
council_id="daejeon-seogu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_45(url='https://nam.daegu.kr/council/index.do?menu_id=00000548') -> ScrapResult:
+def scrap_45(
+ url="https://nam.daegu.kr/council/index.do?menu_id=00000548",
+) -> ScrapResult:
"""대전시 남구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -102,12 +118,14 @@ def scrap_45(url='https://nam.daegu.kr/council/index.do?menu_id=00000548') -> Sc
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
- name_tag = profile.find('span', class_='name2')
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find("span", class_="name2")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = profile.find('span', class_='name', string='소속정당').find_next('span', class_='name3')
+ party = "정당 정보 없음"
+ party_info = profile.find("span", class_="name", string="소속정당").find_next(
+ "span", class_="name3"
+ )
if party_info:
party = party_info.get_text(strip=True)
@@ -116,11 +134,11 @@ def scrap_45(url='https://nam.daegu.kr/council/index.do?menu_id=00000548') -> Sc
return ScrapResult(
council_id="daejeon-namgu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_46(url='https://bukgucouncil.daegu.kr/kr/member/name.do') -> ScrapResult:
+def scrap_46(url="https://bukgucouncil.daegu.kr/kr/member/name.do") -> ScrapResult:
"""대전시 북구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -129,12 +147,14 @@ def scrap_46(url='https://bukgucouncil.daegu.kr/kr/member/name.do') -> ScrapResu
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
- name_tag = profile.find('em', class_='name')
- name = name_tag.get_text(strip=True).split()[0].strip() if name_tag else "이름 정보 없음"
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find("em", class_="name")
+ name = (
+ name_tag.get_text(strip=True).split()[0].strip() if name_tag else "이름 정보 없음"
+ )
- party = '정당 정보 없음'
- party_info = profile.find('em', string='소속정당 : ').find_next('span')
+ party = "정당 정보 없음"
+ party_info = profile.find("em", string="소속정당 : ").find_next("span")
if party_info:
party = party_info.get_text(strip=True)
@@ -143,11 +163,13 @@ def scrap_46(url='https://bukgucouncil.daegu.kr/kr/member/name.do') -> ScrapResu
return ScrapResult(
council_id="daejeon-bukgu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_47(url='https://suseongcouncil.suseong.kr/ss_council/content/?pos=active&me_code=2010') -> ScrapResult:
+def scrap_47(
+ url="https://suseongcouncil.suseong.kr/ss_council/content/?pos=active&me_code=2010",
+) -> ScrapResult:
"""대전시 수성구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -156,12 +178,12 @@ def scrap_47(url='https://suseongcouncil.suseong.kr/ss_council/content/?pos=acti
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='item'):
- name_tag = profile.find('p', class_='name').find('span')
+ for profile in soup.find_all("div", class_="item"):
+ name_tag = profile.find("p", class_="name").find("span")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = profile.find_all('li')[2].find('span')
+ party = "정당 정보 없음"
+ party_info = profile.find_all("li")[2].find("span")
if party_info:
party = party_info.get_text(strip=True)
@@ -170,11 +192,13 @@ def scrap_47(url='https://suseongcouncil.suseong.kr/ss_council/content/?pos=acti
return ScrapResult(
council_id="daejeon-suseonggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_48(url='https://www.dalseocouncil.daegu.kr/content/member/member.html') -> ScrapResult:
+def scrap_48(
+ url="https://www.dalseocouncil.daegu.kr/content/member/member.html",
+) -> ScrapResult:
"""대전시 달서구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -183,11 +207,15 @@ def scrap_48(url='https://www.dalseocouncil.daegu.kr/content/member/member.html'
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for name_tag in soup.find_all('dd', class_='name'):
- name = name_tag.get_text(strip=True).split('(')[0].strip() if name_tag else "이름 정보 없음"
+ for name_tag in soup.find_all("dd", class_="name"):
+ name = (
+ name_tag.get_text(strip=True).split("(")[0].strip()
+ if name_tag
+ else "이름 정보 없음"
+ )
- party = '정당 정보 없음'
- party_info = name_tag.find_next('span', string='소속정당').parent
+ party = "정당 정보 없음"
+ party_info = name_tag.find_next("span", string="소속정당").parent
if party_info:
party = party_info.get_text(strip=True).split()[-1].strip()
@@ -196,11 +224,13 @@ def scrap_48(url='https://www.dalseocouncil.daegu.kr/content/member/member.html'
return ScrapResult(
council_id="daejeon-dalseogu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_49(url='https://council.dalseong.go.kr/content/member/member.html') -> ScrapResult:
+def scrap_49(
+ url="https://council.dalseong.go.kr/content/member/member.html",
+) -> ScrapResult:
"""대전시 달성군 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -213,27 +243,35 @@ def scrap_49(url='https://council.dalseong.go.kr/content/member/member.html') ->
parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
- for name_tag in soup.find_all('dd', class_='name'):
- name = name_tag.get_text(strip=True).split('(')[0].strip() if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
+ for name_tag in soup.find_all("dd", class_="name"):
+ name = (
+ name_tag.get_text(strip=True).split("(")[0].strip()
+ if name_tag
+ else "이름 정보 없음"
+ )
+ party = "정당 정보 없음"
- profile_link = name_tag.find_next('a', class_='abtn1')
+ profile_link = name_tag.find_next("a", class_="abtn1")
if profile_link:
- profile_url = base_url + profile_link['href']
+ profile_url = base_url + profile_link["href"]
profile_soup = get_soup(profile_url, verify=False)
- party_info = profile_soup.find('span', class_='item', string='소속정당')
- if party_info and (party_span := party_info.find_next('span', class_='item_content')) is not None:
+ party_info = profile_soup.find("span", class_="item", string="소속정당")
+ if (
+ party_info
+ and (party_span := party_info.find_next("span", class_="item_content"))
+ is not None
+ ):
party = party_span.get_text(strip=True)
-
+
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="daejeon-dalseonggun",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-if __name__ == '__main__':
- print(scrap_49())
\ No newline at end of file
+if __name__ == "__main__":
+ print(scrap_49())
diff --git a/scrap/local_councils/daejeon.py b/scrap/local_councils/daejeon.py
index 14a484f..091a130 100644
--- a/scrap/local_councils/daejeon.py
+++ b/scrap/local_councils/daejeon.py
@@ -5,12 +5,13 @@
from scrap.utils.requests import get_soup
import re
-def scrap_65(url = 'https://council.donggu.go.kr/kr/member/active') -> ScrapResult:
- '''대전시 동구 페이지에서 의원 상세약력 스크랩
+
+def scrap_65(url="https://council.donggu.go.kr/kr/member/active") -> ScrapResult:
+ """대전시 동구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
- '''
+ """
soup = get_soup(url, verify=False)
councilors: List[Councilor] = []
@@ -18,20 +19,23 @@ def scrap_65(url = 'https://council.donggu.go.kr/kr/member/active') -> ScrapResu
parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
- for profile in soup.find_all('dl', class_='profile'):
+ for profile in soup.find_all("dl", class_="profile"):
name_tag = profile.find("strong", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
+ party = "정당 정보 없음"
# 프로필보기 링크 가져오기
- profile_link = profile.find('a', class_='start')
+ profile_link = profile.find("a", class_="start")
if profile_link:
- data_uid = profile_link.get('data-uid')
+ data_uid = profile_link.get("data-uid")
if data_uid:
- profile_url = base_url + f'/kr/member/profile_popup?uid={data_uid}'
+ profile_url = base_url + f"/kr/member/profile_popup?uid={data_uid}"
profile_soup = get_soup(profile_url, verify=False)
- party_info = profile_soup.find('strong', string='정 당')
- if party_info and (party_span := party_info.find_next('span')) is not None:
+ party_info = profile_soup.find("strong", string="정 당")
+ if (
+ party_info
+ and (party_span := party_info.find_next("span")) is not None
+ ):
party = party_span.text
councilors.append(Councilor(name=name, party=party))
@@ -39,19 +43,20 @@ def scrap_65(url = 'https://council.donggu.go.kr/kr/member/active') -> ScrapResu
return ScrapResult(
council_id="daejeon-donggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_66(url = 'https://council.djjunggu.go.kr/kr/member/name.do') -> ScrapResult:
- '''대전시 중구 페이지에서 의원 상세약력 스크랩
+
+def scrap_66(url="https://council.djjunggu.go.kr/kr/member/name.do") -> ScrapResult:
+ """대전시 중구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
- '''
+ """
soup = get_soup(url, verify=False)
councilors: List[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
+ for profile in soup.find_all("div", class_="profile"):
name_tag = profile.find("div", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
@@ -64,24 +69,29 @@ def scrap_66(url = 'https://council.djjunggu.go.kr/kr/member/name.do') -> ScrapR
return ScrapResult(
council_id="daejeon-junggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_67(url = 'https://www.seogucouncil.daejeon.kr/svc/mbr/MbrPresent.do') -> ScrapResult:
- '''대전시 서구 페이지에서 의원 상세약력 스크랩
+
+def scrap_67(
+ url="https://www.seogucouncil.daejeon.kr/svc/mbr/MbrPresent.do",
+) -> ScrapResult:
+ """대전시 서구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
- '''
+ """
soup = get_soup(url, verify=False)
councilors: List[Councilor] = []
- for profile in soup.find_all('dl'):
+ for profile in soup.find_all("dl"):
name_tag = profile.find("dd", class_="name")
- name = name_tag.get_text(strip=True).replace(" 의원", "") if name_tag else "이름 정보 없음"
+ name = (
+ name_tag.get_text(strip=True).replace(" 의원", "") if name_tag else "이름 정보 없음"
+ )
party = "정당 정보 없음"
- party_info = list(filter(lambda x: '정당' in str(x), profile.find_all("dd")))
+ party_info = list(filter(lambda x: "정당" in str(x), profile.find_all("dd")))
if party_info:
party = party_info[0].get_text(strip=True).replace("정당: ", "")
@@ -90,25 +100,26 @@ def scrap_67(url = 'https://www.seogucouncil.daejeon.kr/svc/mbr/MbrPresent.do')
return ScrapResult(
council_id="daejeon-seogu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_68(url = 'https://yuseonggucouncil.go.kr/page/page02_01_01.php') -> ScrapResult:
- '''대전시 유성구 페이지에서 의원 상세약력 스크랩
+
+def scrap_68(url="https://yuseonggucouncil.go.kr/page/page02_01_01.php") -> ScrapResult:
+ """대전시 유성구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
- '''
+ """
soup = get_soup(url, verify=False)
councilors: List[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
+ for profile in soup.find_all("div", class_="profile"):
name_tag = profile.find("em", class_="name")
# () 안에 있는 한자를 제거 (ex. 김영희(金英姬) -> 김영희)
- name = name_tag.get_text(strip=True).split('(')[0] if name_tag else "이름 정보 없음"
+ name = name_tag.get_text(strip=True).split("(")[0] if name_tag else "이름 정보 없음"
party = "정당 정보 없음"
- regex_pattern = re.compile(r'정\s*당\s*:', re.IGNORECASE) # Case-insensitive
+ regex_pattern = re.compile(r"정\s*당\s*:", re.IGNORECASE) # Case-insensitive
party_info = profile.find("em", string=regex_pattern)
if party_info:
party = party_info.find_next("span").get_text(strip=True)
@@ -117,24 +128,25 @@ def scrap_68(url = 'https://yuseonggucouncil.go.kr/page/page02_01_01.php') -> Sc
return ScrapResult(
council_id="daejeon-yuseonggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_69(url = 'https://council.daedeok.go.kr/kr/member/name.do') -> ScrapResult:
- '''대전시 대덕구 페이지에서 의원 상세약력 스크랩
+
+def scrap_69(url="https://council.daedeok.go.kr/kr/member/name.do") -> ScrapResult:
+ """대전시 대덕구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
- '''
+ """
soup = get_soup(url, verify=False)
councilors: List[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
+ for profile in soup.find_all("div", class_="profile"):
name_tag = profile.find("em", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
party = "정당 정보 없음"
- regex_pattern = re.compile(r'정\s*당\s*:', re.IGNORECASE) # Case-insensitive
+ regex_pattern = re.compile(r"정\s*당\s*:", re.IGNORECASE) # Case-insensitive
party_info = profile.find("em", string=regex_pattern)
if party_info:
party = party_info.find_next("span").get_text(strip=True)
@@ -143,8 +155,9 @@ def scrap_69(url = 'https://council.daedeok.go.kr/kr/member/name.do') -> ScrapRe
return ScrapResult(
council_id="daejeon-daedeokgu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-if __name__ == '__main__':
- print(scrap_69())
\ No newline at end of file
+
+if __name__ == "__main__":
+ print(scrap_69())
diff --git a/scrap/local_councils/gwangju.py b/scrap/local_councils/gwangju.py
index b34f872..a162b4a 100644
--- a/scrap/local_councils/gwangju.py
+++ b/scrap/local_councils/gwangju.py
@@ -2,4 +2,4 @@
"""
from scrap.utils.types import CouncilType, Councilor, ScrapResult
from scrap.utils.requests import get_soup
-from scrap.local_councils.basic import *
\ No newline at end of file
+from scrap.local_councils.basic import *
diff --git a/scrap/local_councils/gyeonggi.py b/scrap/local_councils/gyeonggi.py
index 7fc2627..8d22ab0 100644
--- a/scrap/local_councils/gyeonggi.py
+++ b/scrap/local_councils/gyeonggi.py
@@ -4,66 +4,88 @@
from scrap.utils.requests import get_soup
from scrap.local_councils.basic import *
+
def get_profiles_88(soup, element, class_, memberlistelement, memberlistclass_):
# 의원 목록 사이트에서 의원 프로필을 가져옴
if memberlistelement is not None:
try:
soup = soup.find_all(memberlistelement, id=memberlistclass_)[0]
except Exception:
- raise RuntimeError('[basic.py] 의원 목록 사이트에서 의원 프로필을 가져오는데 실패했습니다.')
+ raise RuntimeError("[basic.py] 의원 목록 사이트에서 의원 프로필을 가져오는데 실패했습니다.")
return soup.find_all(element, class_)
+
def get_party_88(profile, element, class_, wrapper_element, wrapper_class_, url):
# 의원 프로필에서 의원이 몸담는 정당 이름을 가져옴
if wrapper_element is not None:
parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
# 프로필보기 링크 가져오기
- profile_link = find(profile, wrapper_element, class_=wrapper_class_).find('a')
- profile_url = base_url + profile_link['href']
- profile = get_soup(profile_url, verify=False, encoding='euc-kr')
- party_pulp_list = list(filter(lambda x: regex_pattern.search(str(x)), find_all(profile, element, class_)))
- if party_pulp_list == []: raise RuntimeError('[basic.py] 정당정보 regex 실패')
+ profile_link = find(profile, wrapper_element, class_=wrapper_class_).find("a")
+ profile_url = base_url + profile_link["href"]
+ profile = get_soup(profile_url, verify=False, encoding="euc-kr")
+ party_pulp_list = list(
+ filter(
+ lambda x: regex_pattern.search(str(x)), find_all(profile, element, class_)
+ )
+ )
+ if party_pulp_list == []:
+ raise RuntimeError("[basic.py] 정당정보 regex 실패")
party_pulp = party_pulp_list[0]
- party_string = party_pulp.get_text(strip=True).split(' ')[-1]
+ party_string = party_pulp.get_text(strip=True).split(" ")[-1]
while True:
if (party := extract_party(party_string)) is not None:
return party
- if (party_pulp := party_pulp.find_next('span')) is not None:
- party_string = party_pulp.text.strip().split(' ')[-1]
+ if (party_pulp := party_pulp.find_next("span")) is not None:
+ party_string = party_pulp.text.strip().split(" ")[-1]
else:
return "[basic.py] 정당 정보 파싱 불가"
+
def scrap_88(url, args: ScrapBasicArgument) -> ScrapResult:
- '''의원 상세약력 스크랩
+ """의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:param args: ScrapBasicArgument 객체
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
- '''
+ """
cid = 88
- encoding = 'euc-kr'
+ encoding = "euc-kr"
soup = get_soup(url, verify=False, encoding=encoding)
councilors: list[Councilor] = []
party_in_main_page = any(keyword in soup.text for keyword in party_keywords)
- profiles = get_profiles_88(soup, args.pf_elt, args.pf_cls, args.pf_memlistelt, args.pf_memlistcls)
- print(cid, '번째 의회에는,', len(profiles), '명의 의원이 있습니다.') # 디버깅용.
+ profiles = get_profiles_88(
+ soup, args.pf_elt, args.pf_cls, args.pf_memlistelt, args.pf_memlistcls
+ )
+ print(cid, "번째 의회에는,", len(profiles), "명의 의원이 있습니다.") # 디버깅용.
for profile in profiles:
- name = get_name(profile, args.name_elt, args.name_cls, args.name_wrapelt, args.name_wrapcls)
- party = ''
+ name = get_name(
+ profile, args.name_elt, args.name_cls, args.name_wrapelt, args.name_wrapcls
+ )
+ party = ""
try:
- party = get_party_88(profile, args.pty_elt, args.pty_cls, args.pty_wrapelt, args.pty_wrapcls, url)
+ party = get_party_88(
+ profile,
+ args.pty_elt,
+ args.pty_cls,
+ args.pty_wrapelt,
+ args.pty_wrapcls,
+ url,
+ )
except Exception:
- party = get_party_easy(profile, args.pty_wrapelt, args.pty_wrapcls, args.pty_wraptxt, url)
+ party = get_party_easy(
+ profile, args.pty_wrapelt, args.pty_wrapcls, args.pty_wraptxt, url
+ )
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id=str(cid),
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
+
def get_party_103(profile, element, class_, wrapper_element, wrapper_class_, url):
# 의원 프로필에서 의원이 몸담는 정당 이름을 가져옴
if wrapper_element is not None:
@@ -71,41 +93,53 @@ def get_party_103(profile, element, class_, wrapper_element, wrapper_class_, url
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
# 프로필보기 링크 가져오기
profile_link = profile.find(wrapper_element, class_=wrapper_class_)
- profile_url = base_url + '/member/' + profile_link['href']
+ profile_url = base_url + "/member/" + profile_link["href"]
profile = get_soup(profile_url, verify=False)
- party_pulp_list = list(filter(lambda x: regex_pattern.search(str(x)), find_all(profile, element, class_)))
- if party_pulp_list == []: raise RuntimeError('[basic.py] 정당정보 regex 실패')
+ party_pulp_list = list(
+ filter(
+ lambda x: regex_pattern.search(str(x)), find_all(profile, element, class_)
+ )
+ )
+ if party_pulp_list == []:
+ raise RuntimeError("[basic.py] 정당정보 regex 실패")
party_pulp = party_pulp_list[0]
- party_string = party_pulp.get_text(strip=True).split(' ')[-1]
+ party_string = party_pulp.get_text(strip=True).split(" ")[-1]
while True:
if (party := extract_party(party_string)) is not None:
return party
- if (party_pulp := party_pulp.find_next('span')) is not None:
- party_string = party_pulp.text.strip().split(' ')[-1]
+ if (party_pulp := party_pulp.find_next("span")) is not None:
+ party_string = party_pulp.text.strip().split(" ")[-1]
else:
return "[basic.py] 정당 정보 파싱 불가"
+
def scrap_103(url, args: ScrapBasicArgument) -> ScrapResult:
- '''의원 상세약력 스크랩
+ """의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:param args: ScrapBasicArgument 객체
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
- '''
+ """
cid = 103
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
party_in_main_page = any(keyword in soup.text for keyword in party_keywords)
- profiles = get_profiles_88(soup, args.pf_elt, args.pf_cls, args.pf_memlistelt, args.pf_memlistcls)
- print(cid, '번째 의회에는,', len(profiles), '명의 의원이 있습니다.') # 디버깅용.
+ profiles = get_profiles_88(
+ soup, args.pf_elt, args.pf_cls, args.pf_memlistelt, args.pf_memlistcls
+ )
+ print(cid, "번째 의회에는,", len(profiles), "명의 의원이 있습니다.") # 디버깅용.
for profile in profiles:
- name = get_name(profile, args.name_elt, args.name_cls, args.name_wrapelt, args.name_wrapcls)
- party = get_party_103(profile, args.pty_elt, args.pty_cls, args.pty_wrapelt, args.pty_wrapcls, url)
+ name = get_name(
+ profile, args.name_elt, args.name_cls, args.name_wrapelt, args.name_wrapcls
+ )
+ party = get_party_103(
+ profile, args.pty_elt, args.pty_cls, args.pty_wrapelt, args.pty_wrapcls, url
+ )
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id=str(cid),
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
- )
\ No newline at end of file
+ councilors=councilors,
+ )
diff --git a/scrap/local_councils/incheon.py b/scrap/local_councils/incheon.py
index 2506579..58384c3 100644
--- a/scrap/local_councils/incheon.py
+++ b/scrap/local_councils/incheon.py
@@ -4,7 +4,8 @@
from scrap.utils.requests import get_soup
from scrap.local_councils.basic import *
-def scrap_50(url='https://www.icjg.go.kr/council/cnmi0101c') -> ScrapResult:
+
+def scrap_50(url="https://www.icjg.go.kr/council/cnmi0101c") -> ScrapResult:
"""인천시 중구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -13,58 +14,61 @@ def scrap_50(url='https://www.icjg.go.kr/council/cnmi0101c') -> ScrapResult:
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for name_tag in soup.find_all('p', class_='name'):
- name_tag_str = name_tag.get_text(strip=True).split('[')
+ for name_tag in soup.find_all("p", class_="name"):
+ name_tag_str = name_tag.get_text(strip=True).split("[")
name = name_tag_str[0].strip()
party = name_tag_str[-1][:-1].strip()
-
+
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="incheon-junggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_51(url='https://council.icdonggu.go.kr/korean/member/active') -> ScrapResult:
+def scrap_51(url="https://council.icdonggu.go.kr/korean/member/active") -> ScrapResult:
"""인천시 동구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""
- raise Exception('현재 인천시 동구의회 사이트는 SSLV3_ALERT_HANDSHAKE_FAILURE 에러가 발생합니다')
+ raise Exception("현재 인천시 동구의회 사이트는 SSLV3_ALERT_HANDSHAKE_FAILURE 에러가 발생합니다")
# soup = get_soup(url, verify=False)
# councilors: list[Councilor] = []
- # # 프로필 링크 스크랩을 위해 base_url 추출
- # parsed_url = urlparse(url)
- # base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
- # for name_tag in soup.find_all('strong', class_='name'):
- # name = name_tag.get_text(strip=True)
- # party = '정당 정보 없음'
-
- # profile_link = name_tag.find_next('a', class_='abtn1')
- # if profile_link:
- # profile_url = base_url + profile_link['onclick'][13:104]
- # profile_soup = get_soup(profile_url, verify=False)
-
- # party_info = profile_soup.find('span', class_='subject', string='소속정당')
- # if party_info and (party_span := party_info.find_next('span', class_='detail')) is not None:
- # party = party_span.get_text(strip=True)
-
- # councilors.append(Councilor(name=name, party=party))
+# # 프로필 링크 스크랩을 위해 base_url 추출
+# parsed_url = urlparse(url)
+# base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
- # return ScrapResult(
- # council_id="incheon-donggu",
- # council_type=CouncilType.LOCAL_COUNCIL,
- # councilors=councilors
- # )
+# for name_tag in soup.find_all('strong', class_='name'):
+# name = name_tag.get_text(strip=True)
+# party = '정당 정보 없음'
+
+# profile_link = name_tag.find_next('a', class_='abtn1')
+# if profile_link:
+# profile_url = base_url + profile_link['onclick'][13:104]
+# profile_soup = get_soup(profile_url, verify=False)
+
+# party_info = profile_soup.find('span', class_='subject', string='소속정당')
+# if party_info and (party_span := party_info.find_next('span', class_='detail')) is not None:
+# party = party_span.get_text(strip=True)
+# councilors.append(Councilor(name=name, party=party))
-def scrap_52(url='https://www.michuhol.go.kr/council/introduction/career.asp') -> ScrapResult:
+# return ScrapResult(
+# council_id="incheon-donggu",
+# council_type=CouncilType.LOCAL_COUNCIL,
+# councilors=councilors
+# )
+
+
+def scrap_52(
+ url="https://www.michuhol.go.kr/council/introduction/career.asp",
+) -> ScrapResult:
"""인천시 미추홀구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -72,19 +76,23 @@ def scrap_52(url='https://www.michuhol.go.kr/council/introduction/career.asp') -
"""
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
-
- script = soup.find('div', class_='contents_header').find_next('script').get_text(strip=True)
- # TODO
+ script = (
+ soup.find("div", class_="contents_header")
+ .find_next("script")
+ .get_text(strip=True)
+ )
+
+ # TODO
return ScrapResult(
council_id="incheon-michuholgu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_53(url='https://council.yeonsu.go.kr/kr/member/name.do') -> ScrapResult:
+def scrap_53(url="https://council.yeonsu.go.kr/kr/member/name.do") -> ScrapResult:
"""인천시 연수구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -93,25 +101,27 @@ def scrap_53(url='https://council.yeonsu.go.kr/kr/member/name.do') -> ScrapResul
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
- name_tag = profile.find('strong')
- name = name_tag.get_text(strip=True) if name_tag else '이름 정보 없음'
-
- party = '정당 정보 없음'
- party_info = profile.find('em', string='소속정당').find_next('span').find_next('span')
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find("strong")
+ name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
+
+ party = "정당 정보 없음"
+ party_info = (
+ profile.find("em", string="소속정당").find_next("span").find_next("span")
+ )
if party_info:
party = party_info.get_text(strip=True)
-
+
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="incheon-yeonsugu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_54(url='https://council.namdong.go.kr/kr/member/active.do') -> ScrapResult:
+def scrap_54(url="https://council.namdong.go.kr/kr/member/active.do") -> ScrapResult:
"""인천시 남동구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -120,31 +130,31 @@ def scrap_54(url='https://council.namdong.go.kr/kr/member/active.do') -> ScrapRe
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
- name_tag = profile.find('em', class_='name')
- name = name_tag.get_text(strip=True) if name_tag else '이름 정보 없음'
-
- party = '정당 정보 없음'
- party_info = profile.find('em', string='정 당 : ').find_next('span')
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find("em", class_="name")
+ name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
+
+ party = "정당 정보 없음"
+ party_info = profile.find("em", string="정 당 : ").find_next("span")
if party_info:
party = party_info.get_text(strip=True)
-
+
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="incheon-namdonggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_55(url='https://council.icbp.go.kr/kr/member/active') -> ScrapResult:
+def scrap_55(url="https://council.icbp.go.kr/kr/member/active") -> ScrapResult:
"""인천시 부평구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""
- raise Exception('현재 인천시 부평구의회 사이트는 SSLV3_ALERT_HANDSHAKE_FAILURE 에러가 발생합니다')
+ raise Exception("현재 인천시 부평구의회 사이트는 SSLV3_ALERT_HANDSHAKE_FAILURE 에러가 발생합니다")
# soup = get_soup(url, verify=False)
# councilors: list[Councilor] = []
@@ -152,12 +162,12 @@ def scrap_55(url='https://council.icbp.go.kr/kr/member/active') -> ScrapResult:
# for profile in soup.find_all('div', class_='profile'):
# name_tag = profile.find('strong', class_='name')
# name = name_tag.get_text(strip=True).split()[0].strip() if name_tag else '이름 정보 없음'
-
+
# party = '정당 정보 없음'
# party_info = profile.find('strong', string='소속정당').find_next('span')
# if party_info:
# party = party_info.get_text(strip=True).split()[-1].strip()
-
+
# councilors.append(Councilor(name=name, party=party))
# return ScrapResult(
@@ -167,7 +177,9 @@ def scrap_55(url='https://council.icbp.go.kr/kr/member/active') -> ScrapResult:
# )
-def scrap_56(url='https://www.gyeyang.go.kr/open_content/council/member/present/present.jsp') -> ScrapResult:
+def scrap_56(
+ url="https://www.gyeyang.go.kr/open_content/council/member/present/present.jsp",
+) -> ScrapResult:
"""인천시 계양구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -176,22 +188,25 @@ def scrap_56(url='https://www.gyeyang.go.kr/open_content/council/member/present/
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for name_tag in soup.find_all('li', class_='name'):
- name = name_tag.get_text(strip=True) if name_tag else '이름 정보 없음'
-
- party = '정당 정보 없음'
- party_info = name_tag.find_next('li').find_next('li').find('span', class_='span_sfont')
+ for name_tag in soup.find_all("li", class_="name"):
+ name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
+
+ party = "정당 정보 없음"
+ party_info = (
+ name_tag.find_next("li").find_next("li").find("span", class_="span_sfont")
+ )
if party_info:
party = party_info.get_text(strip=True)
-
+
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="incheon-gyeyanggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
+
def scrap_57(url, args) -> ScrapResult:
"""인천시 서구 페이지에서 의원 상세약력 스크랩
@@ -202,23 +217,28 @@ def scrap_57(url, args) -> ScrapResult:
councilors: list[Councilor] = []
cid = 57
- profiles = get_profiles(soup, args.pf_elt, args.pf_cls, args.pf_memlistelt, args.pf_memlistcls)
- print(cid, '번째 의회에는,', len(profiles), '명의 의원이 있습니다.') # 디버깅용.
+ profiles = get_profiles(
+ soup, args.pf_elt, args.pf_cls, args.pf_memlistelt, args.pf_memlistcls
+ )
+ print(cid, "번째 의회에는,", len(profiles), "명의 의원이 있습니다.") # 디버깅용.
for profile in profiles:
- name = get_name(profile, args.name_elt, args.name_cls, args.name_wrapelt, args.name_wrapcls)
+ name = get_name(
+ profile, args.name_elt, args.name_cls, args.name_wrapelt, args.name_wrapcls
+ )
- party = '정당 정보 없음'
+ party = "정당 정보 없음"
party_pulp = find(profile, args.pty_elt, class_=args.pty_cls)
- if party_pulp is None: raise AssertionError('[incheon.py] 정당정보 실패')
+ if party_pulp is None:
+ raise AssertionError("[incheon.py] 정당정보 실패")
party_string = party_pulp.get_text(strip=True)
- party_string = party_string.split(' ')[-1].strip()
+ party_string = party_string.split(" ")[-1].strip()
while True:
party = extract_party(party_string)
if party is not None:
break
- if (party_pulp := party_pulp.find_next('span')) is not None:
- party_string = party_pulp.text.split(' ')[-1]
+ if (party_pulp := party_pulp.find_next("span")) is not None:
+ party_string = party_pulp.text.split(" ")[-1]
else:
raise RuntimeError("[incheon.py] 정당 정보 파싱 불가")
@@ -227,8 +247,9 @@ def scrap_57(url, args) -> ScrapResult:
return ScrapResult(
council_id=str(cid),
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-if __name__ == '__main__':
- print(scrap_56())
\ No newline at end of file
+
+if __name__ == "__main__":
+ print(scrap_56())
diff --git a/scrap/local_councils/seoul.py b/scrap/local_councils/seoul.py
index 82050e4..af74bd7 100644
--- a/scrap/local_councils/seoul.py
+++ b/scrap/local_councils/seoul.py
@@ -4,39 +4,41 @@
from scrap.utils.requests import get_soup
-def scrap_1(url = 'https://bookcouncil.jongno.go.kr/record/recordView.do?key=99784f935fce5c1d7c8c08c2f9e35dda1c0a6128428ecb1a87f87ee2b4e82890ffcf12563e01473f') -> ScrapResult:
- '''서울시 종로구 페이지에서 의원 상세약력 스크랩
+def scrap_1(
+ url="https://bookcouncil.jongno.go.kr/record/recordView.do?key=99784f935fce5c1d7c8c08c2f9e35dda1c0a6128428ecb1a87f87ee2b4e82890ffcf12563e01473f",
+) -> ScrapResult:
+ """서울시 종로구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
- '''
+ """
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
-
- for profile in soup.find_all('div', class_='pop_profile'):
+
+ for profile in soup.find_all("div", class_="pop_profile"):
info = profile.find("div", class_="info")
data_ul = info.find("ul", class_="detail")
data_lis = data_ul.find_all("li")
name = data_lis[0].find("span").get_text(strip=True)
party = data_lis[2].find("span").get_text(strip=True)
name = name if name else "이름 정보 없음"
- party = party if party else '정당 정보 없음'
+ party = party if party else "정당 정보 없음"
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="seoul-jongno",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_2(url = 'https://02jgnew.council.or.kr/kr/member/active') -> ScrapResult:
- '''서울시 중구 페이지에서 의원 상세약력 스크랩
+def scrap_2(url="https://02jgnew.council.or.kr/kr/member/active") -> ScrapResult:
+ """서울시 중구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
- '''
+ """
parliment_soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
@@ -44,19 +46,19 @@ def scrap_2(url = 'https://02jgnew.council.or.kr/kr/member/active') -> ScrapResu
parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
- for profile in parliment_soup.find_all('div', class_='profile'):
+ for profile in parliment_soup.find_all("div", class_="profile"):
name_tag = profile.find("em", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
+ party = "정당 정보 없음"
# 프로필보기 링크 가져오기
- profile_link = profile.find('a', class_='start')
+ profile_link = profile.find("a", class_="start")
if profile_link:
- profile_url = base_url + profile_link['href']
+ profile_url = base_url + profile_link["href"]
profile_soup = get_soup(profile_url, verify=False)
- party_info = profile_soup.find('em', string='소속정당 : ')
- if party_info and (party_span := party_info.find_next('span')) is not None:
+ party_info = profile_soup.find("em", string="소속정당 : ")
+ if party_info and (party_span := party_info.find_next("span")) is not None:
party = party_span.text
councilors.append(Councilor(name=name, party=party))
@@ -64,7 +66,7 @@ def scrap_2(url = 'https://02jgnew.council.or.kr/kr/member/active') -> ScrapResu
return ScrapResult(
council_id="seoul-junggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
@@ -82,7 +84,7 @@ def scrap_3(url="https://www.yscl.go.kr/kr/member/name.do") -> ScrapResult:
name_tag = profile.find("em", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
+ party = "정당 정보 없음"
party_info = profile.find("em", string="소속정당")
if party_info:
party = party_info.find_next("span").get_text(strip=True)
@@ -92,11 +94,11 @@ def scrap_3(url="https://www.yscl.go.kr/kr/member/name.do") -> ScrapResult:
return ScrapResult(
council_id="seoul-yongsangu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_4(url='https://sdcouncil.sd.go.kr/kr/member/active2') -> ScrapResult:
+def scrap_4(url="https://sdcouncil.sd.go.kr/kr/member/active2") -> ScrapResult:
"""서울시 성동구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -106,11 +108,11 @@ def scrap_4(url='https://sdcouncil.sd.go.kr/kr/member/active2') -> ScrapResult:
councilors: list[Councilor] = []
for profile in soup.find_all("dl", class_="profile"):
- name_tag = profile.find('strong', class_='name')
+ name_tag = profile.find("strong", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = profile.find("strong", string='정 당 : ')
+ party = "정당 정보 없음"
+ party_info = profile.find("strong", string="정 당 : ")
if party_info:
party = party_info.find_next("span").get_text(strip=True)
@@ -119,11 +121,11 @@ def scrap_4(url='https://sdcouncil.sd.go.kr/kr/member/active2') -> ScrapResult:
return ScrapResult(
council_id="seoul-seongdonggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_5(url='https://council.gwangjin.go.kr/kr/member/active') -> ScrapResult:
+def scrap_5(url="https://council.gwangjin.go.kr/kr/member/active") -> ScrapResult:
"""서울시 광진구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -132,51 +134,57 @@ def scrap_5(url='https://council.gwangjin.go.kr/kr/member/active') -> ScrapResul
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all("div", class_=lambda x: x in ('profile', 'profile_none')):
- name_tag = profile.find('strong')
+ for profile in soup.find_all(
+ "div", class_=lambda x: x in ("profile", "profile_none")
+ ):
+ name_tag = profile.find("strong")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = profile.find("em", string='소속정당')
+ party = "정당 정보 없음"
+ party_info = profile.find("em", string="소속정당")
if party_info:
- party = party_info.find_next("span").find_next('span').get_text(strip=True)
+ party = party_info.find_next("span").find_next("span").get_text(strip=True)
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="seoul-gwangjingu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_6(url='http://council.ddm.go.kr/citizen/menu1.asp') -> ScrapResult:
+def scrap_6(url="http://council.ddm.go.kr/citizen/menu1.asp") -> ScrapResult:
"""서울시 동대문구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""
- parliment_soup = get_soup(url, verify=False, encoding='euc-kr')
+ parliment_soup = get_soup(url, verify=False, encoding="euc-kr")
councilors: list[Councilor] = []
# 프로필 링크 스크랩을 위해 base_url 추출
parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
- for profile in parliment_soup.find_all('div', class_='intro_text tm_lg_6'):
- name = profile.find('p', class_='intro_text_title').string.strip().split(' ')[0]
- party = '정당 정보 없음'
+ for profile in parliment_soup.find_all("div", class_="intro_text tm_lg_6"):
+ name = profile.find("p", class_="intro_text_title").string.strip().split(" ")[0]
+ party = "정당 정보 없음"
# 프로필보기 링크 가져오기
- profile_link = profile.find('a')
+ profile_link = profile.find("a")
if profile_link:
- profile_url = base_url + '/assemblyman/greeting/menu02.asp?assembly_id=' + profile_link['href'][1:]
- profile_soup = get_soup(profile_url, verify=False, encoding='euc-kr')
-
- profile_info = profile_soup.find('div', class_='profileTxt')
+ profile_url = (
+ base_url
+ + "/assemblyman/greeting/menu02.asp?assembly_id="
+ + profile_link["href"][1:]
+ )
+ profile_soup = get_soup(profile_url, verify=False, encoding="euc-kr")
+
+ profile_info = profile_soup.find("div", class_="profileTxt")
if profile_info:
- profile_string = profile_info.get_text().strip().split('\xa0')
- idx = profile_string.index('소속정당')
+ profile_string = profile_info.get_text().strip().split("\xa0")
+ idx = profile_string.index("소속정당")
party = profile_string[idx + 2]
councilors.append(Councilor(name=name, party=party))
@@ -184,11 +192,11 @@ def scrap_6(url='http://council.ddm.go.kr/citizen/menu1.asp') -> ScrapResult:
return ScrapResult(
council_id="seoul-dongdaemungu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_7(url='https://council.jungnang.go.kr/kr/member/name2.do') -> ScrapResult:
+def scrap_7(url="https://council.jungnang.go.kr/kr/member/name2.do") -> ScrapResult:
"""서울시 중랑구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -197,25 +205,25 @@ def scrap_7(url='https://council.jungnang.go.kr/kr/member/name2.do') -> ScrapRes
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all("div", class_='profile'):
- name_tag = profile.find('em', class_='name')
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find("em", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = profile.find("em", string='소속정당')
+ party = "정당 정보 없음"
+ party_info = profile.find("em", string="소속정당")
if party_info:
- party = party_info.find_next("span").find_next('span').get_text(strip=True)
+ party = party_info.find_next("span").find_next("span").get_text(strip=True)
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="seoul-jungnanggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_8(url='https://www.sbc.go.kr/kr/member/active.do') -> ScrapResult:
+def scrap_8(url="https://www.sbc.go.kr/kr/member/active.do") -> ScrapResult:
"""서울시 성북구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -224,25 +232,27 @@ def scrap_8(url='https://www.sbc.go.kr/kr/member/active.do') -> ScrapResult:
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all("div", class_='profile'):
- name_tag = profile.find('em', class_='name')
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find("em", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = profile.find("em", string='소속정당')
+ party = "정당 정보 없음"
+ party_info = profile.find("em", string="소속정당")
if party_info:
- party = party_info.find_next("span").get_text(strip=True).split(' ')[-1].strip()
+ party = (
+ party_info.find_next("span").get_text(strip=True).split(" ")[-1].strip()
+ )
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="seoul-seongbukgu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_9(url='https://council.gangbuk.go.kr/kr/member/name.do') -> ScrapResult:
+def scrap_9(url="https://council.gangbuk.go.kr/kr/member/name.do") -> ScrapResult:
"""서울시 강북구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -251,25 +261,31 @@ def scrap_9(url='https://council.gangbuk.go.kr/kr/member/name.do') -> ScrapResul
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all("div", class_='profile'):
- name_tag = profile.find('div', class_='name')
- name = name_tag.find_next('strong').get_text(strip=True) if name_tag else "이름 정보 없음"
-
- party = '정당 정보 없음'
- party_info = profile.find('em', string='소속정당')
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find("div", class_="name")
+ name = (
+ name_tag.find_next("strong").get_text(strip=True)
+ if name_tag
+ else "이름 정보 없음"
+ )
+
+ party = "정당 정보 없음"
+ party_info = profile.find("em", string="소속정당")
if party_info:
- party = party_info.find_next("span").find_next('span').get_text(strip=True)
+ party = party_info.find_next("span").find_next("span").get_text(strip=True)
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="seoul-gangbukgu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_10(url='https://www.council-dobong.seoul.kr/kr/member/active.do') -> ScrapResult:
+def scrap_10(
+ url="https://www.council-dobong.seoul.kr/kr/member/active.do",
+) -> ScrapResult:
"""서울시 도봉구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -278,25 +294,27 @@ def scrap_10(url='https://www.council-dobong.seoul.kr/kr/member/active.do') -> S
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all("div", class_='profile'):
- name_tag = profile.find('em', class_='name')
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find("em", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = profile.find('em', string='소속정당')
+ party = "정당 정보 없음"
+ party_info = profile.find("em", string="소속정당")
if party_info:
- party = party_info.find_next("span").get_text(strip=True).split(' ')[-1].strip()
+ party = (
+ party_info.find_next("span").get_text(strip=True).split(" ")[-1].strip()
+ )
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="seoul-dobonggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_11(url='https://council.nowon.kr/kr/member/active.do') -> ScrapResult:
+def scrap_11(url="https://council.nowon.kr/kr/member/active.do") -> ScrapResult:
"""서울시 노원구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -305,25 +323,27 @@ def scrap_11(url='https://council.nowon.kr/kr/member/active.do') -> ScrapResult:
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all("div", class_='profile'):
- name_tag = profile.find('em', class_='name')
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find("em", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = profile.find('em', string='소속정당')
+ party = "정당 정보 없음"
+ party_info = profile.find("em", string="소속정당")
if party_info:
- party = party_info.find_next("span").get_text(strip=True).split(' ')[-1].strip()
+ party = (
+ party_info.find_next("span").get_text(strip=True).split(" ")[-1].strip()
+ )
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="seoul-nowongu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_12(url='https://council.ep.go.kr/kr/member/name.do') -> ScrapResult:
+def scrap_12(url="https://council.ep.go.kr/kr/member/name.do") -> ScrapResult:
"""서울시 은평구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -332,52 +352,65 @@ def scrap_12(url='https://council.ep.go.kr/kr/member/name.do') -> ScrapResult:
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all("div", class_='profile'):
- name_tag = profile.find('div', class_='name')
- name = name_tag.find_next('strong').get_text(strip=True) if name_tag else "이름 정보 없음"
-
- party = '정당 정보 없음'
- party_info = profile.find('em', string='소속정당')
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find("div", class_="name")
+ name = (
+ name_tag.find_next("strong").get_text(strip=True)
+ if name_tag
+ else "이름 정보 없음"
+ )
+
+ party = "정당 정보 없음"
+ party_info = profile.find("em", string="소속정당")
if party_info:
- party = party_info.find_next('span').find_next('span').get_text(strip=True)
+ party = party_info.find_next("span").find_next("span").get_text(strip=True)
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="seoul-eunpyeonggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_13(url='https://www.sdmcouncil.go.kr/source/korean/square/ascending.html') -> ScrapResult:
+def scrap_13(
+ url="https://www.sdmcouncil.go.kr/source/korean/square/ascending.html",
+) -> ScrapResult:
"""서울시 서대문구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""
- soup = get_soup(url, verify=False, encoding='euc-kr')
+ soup = get_soup(url, verify=False, encoding="euc-kr")
councilors: list[Councilor] = []
- for profile in soup.find_all('dl', class_='card_desc'):
- name_tag = profile.find_next('dt')
+ for profile in soup.find_all("dl", class_="card_desc"):
+ name_tag = profile.find_next("dt")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = profile.find('ul')
+ party = "정당 정보 없음"
+ party_info = profile.find("ul")
if party_info:
- party = party_info.find_next('li').find_next('li').find_next('li').get_text(strip=True).split(' ')[-1].strip()
+ party = (
+ party_info.find_next("li")
+ .find_next("li")
+ .find_next("li")
+ .get_text(strip=True)
+ .split(" ")[-1]
+ .strip()
+ )
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="seoul-seodaemungu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_14(url='https://council.mapo.seoul.kr/kr/member/active.do') -> ScrapResult:
+def scrap_14(url="https://council.mapo.seoul.kr/kr/member/active.do") -> ScrapResult:
"""서울시 마포구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -386,25 +419,25 @@ def scrap_14(url='https://council.mapo.seoul.kr/kr/member/active.do') -> ScrapRe
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='wrap'):
- name_tag = profile.find_next('div', class_='right')
- name = name_tag.find_next('h4').get_text(strip=True) if name_tag else "이름 정보 없음"
+ for profile in soup.find_all("div", class_="wrap"):
+ name_tag = profile.find_next("div", class_="right")
+ name = name_tag.find_next("h4").get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = profile.find('span', class_='tit', string='소속정당 : ')
+ party = "정당 정보 없음"
+ party_info = profile.find("span", class_="tit", string="소속정당 : ")
if party_info:
- party = party_info.find_next('span', class_='con').get_text(strip=True)
+ party = party_info.find_next("span", class_="con").get_text(strip=True)
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="seoul-mapogu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_15(url='https://www.ycc.go.kr/kr/member/active') -> ScrapResult:
+def scrap_15(url="https://www.ycc.go.kr/kr/member/active") -> ScrapResult:
"""서울시 양천구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -417,19 +450,23 @@ def scrap_15(url='https://www.ycc.go.kr/kr/member/active') -> ScrapResult:
parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
- for profile in soup.find_all('div', class_='profile'):
- name_tag = profile.find_next('div', class_='name')
- name = name_tag.find_next('strong').get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find_next("div", class_="name")
+ name = (
+ name_tag.find_next("strong").get_text(strip=True)
+ if name_tag
+ else "이름 정보 없음"
+ )
+ party = "정당 정보 없음"
# 프로필보기 링크 가져오기
- profile_uid = profile.find('a', class_='start')['data-uid']
+ profile_uid = profile.find("a", class_="start")["data-uid"]
if profile_uid:
- profile_url = base_url + '/kr/member/profile_popup?uid=' + profile_uid
+ profile_url = base_url + "/kr/member/profile_popup?uid=" + profile_uid
profile_soup = get_soup(profile_url, verify=False)
- party_info = profile_soup.find('em', string='소속정당')
- if party_info and (party_span := party_info.find_next('span')):
+ party_info = profile_soup.find("em", string="소속정당")
+ if party_info and (party_span := party_info.find_next("span")):
party = party_span.get_text(strip=True)
councilors.append(Councilor(name=name, party=party))
@@ -437,38 +474,44 @@ def scrap_15(url='https://www.ycc.go.kr/kr/member/active') -> ScrapResult:
return ScrapResult(
council_id="seoul-yangcheongu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_16(url='https://gsc.gangseo.seoul.kr/member/org.asp') -> ScrapResult:
+def scrap_16(url="https://gsc.gangseo.seoul.kr/member/org.asp") -> ScrapResult:
"""서울시 강서구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""
- soup = get_soup(url, verify=False, encoding='euc-kr')
+ soup = get_soup(url, verify=False, encoding="euc-kr")
councilors: list[Councilor] = []
- for profile in soup.find_all('ul', class_='mb-15'):
- name_tag = profile.find_next('span', class_='fs-18 fw-700')
- name = name_tag.get_text(strip=True).split()[0].strip() if name_tag else "이름 정보 없음"
-
- party = '정당 정보 없음'
- party_info = profile.find_next('span', class_='title').find_next('span', class_='title').find_next('span', class_='title')
+ for profile in soup.find_all("ul", class_="mb-15"):
+ name_tag = profile.find_next("span", class_="fs-18 fw-700")
+ name = (
+ name_tag.get_text(strip=True).split()[0].strip() if name_tag else "이름 정보 없음"
+ )
+
+ party = "정당 정보 없음"
+ party_info = (
+ profile.find_next("span", class_="title")
+ .find_next("span", class_="title")
+ .find_next("span", class_="title")
+ )
if party_info:
- party = party_info.find_next('span').get_text(strip=True)
+ party = party_info.find_next("span").get_text(strip=True)
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="seoul-gangseogu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_17(url='https://www.guroc.go.kr/kr/member/name.do') -> ScrapResult:
+def scrap_17(url="https://www.guroc.go.kr/kr/member/name.do") -> ScrapResult:
"""서울시 구로구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -477,38 +520,46 @@ def scrap_17(url='https://www.guroc.go.kr/kr/member/name.do') -> ScrapResult:
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
- name_tag = profile.find_next('div', class_='name')
- name = name_tag.find_next('strong').get_text(strip=True) if name_tag else "이름 정보 없음"
-
- party = '정당 정보 없음'
- party_info = profile.find('em', string='소속정당')
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find_next("div", class_="name")
+ name = (
+ name_tag.find_next("strong").get_text(strip=True)
+ if name_tag
+ else "이름 정보 없음"
+ )
+
+ party = "정당 정보 없음"
+ party_info = profile.find("em", string="소속정당")
if party_info:
- party = party_info.find_next('span').find_next('span').get_text(strip=True)
+ party = party_info.find_next("span").find_next("span").get_text(strip=True)
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="seoul-gurogu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_18(url='https://council.geumcheon.go.kr/member/member.asp') -> ScrapResult:
+def scrap_18(url="https://council.geumcheon.go.kr/member/member.asp") -> ScrapResult:
"""서울시 금천구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""
- soup = get_soup(url, verify=False, encoding='euc-kr')
+ soup = get_soup(url, verify=False, encoding="euc-kr")
councilors: list[Councilor] = []
- for profile in soup.find_all('li', class_='name'):
- name_tag = profile.find_next('strong')
- name = name_tag.get_text(strip=True).split('(')[0].strip() if name_tag else "이름 정보 없음"
+ for profile in soup.find_all("li", class_="name"):
+ name_tag = profile.find_next("strong")
+ name = (
+ name_tag.get_text(strip=True).split("(")[0].strip()
+ if name_tag
+ else "이름 정보 없음"
+ )
- party = '정당 정보 없음'
+ party = "정당 정보 없음"
# TODO
councilors.append(Councilor(name=name, party=party))
@@ -516,11 +567,11 @@ def scrap_18(url='https://council.geumcheon.go.kr/member/member.asp') -> ScrapRe
return ScrapResult(
council_id="seoul-geumcheongu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_19(url='https://www.ydpc.go.kr/kr/member/active.do') -> ScrapResult:
+def scrap_19(url="https://www.ydpc.go.kr/kr/member/active.do") -> ScrapResult:
"""서울시 영등포구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -529,25 +580,25 @@ def scrap_19(url='https://www.ydpc.go.kr/kr/member/active.do') -> ScrapResult:
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
- name_tag = profile.find_next('em', class_='name')
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find_next("em", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = profile.find('em', string='소속정당 : ')
+ party = "정당 정보 없음"
+ party_info = profile.find("em", string="소속정당 : ")
if party_info:
- party = party_info.find_next('span').get_text(strip=True)
+ party = party_info.find_next("span").get_text(strip=True)
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="seoul-yeongdeungpogu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_20(url='http://assembly.dongjak.go.kr/kr/member/name.do') -> ScrapResult:
+def scrap_20(url="http://assembly.dongjak.go.kr/kr/member/name.do") -> ScrapResult:
"""서울시 동작구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -556,25 +607,25 @@ def scrap_20(url='http://assembly.dongjak.go.kr/kr/member/name.do') -> ScrapResu
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
- name_tag = profile.find_next('em', class_='name')
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find_next("em", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = profile.find('em', string='소속정당')
+ party = "정당 정보 없음"
+ party_info = profile.find("em", string="소속정당")
if party_info:
- party = party_info.find_next('span').find_next('span').get_text(strip=True)
+ party = party_info.find_next("span").find_next("span").get_text(strip=True)
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="seoul-dongjakgu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_21(url='https://www.ga21c.seoul.kr/kr/member/name.do') -> ScrapResult:
+def scrap_21(url="https://www.ga21c.seoul.kr/kr/member/name.do") -> ScrapResult:
"""서울시 관악구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -583,25 +634,27 @@ def scrap_21(url='https://www.ga21c.seoul.kr/kr/member/name.do') -> ScrapResult:
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
- name_tag = profile.find_next('em', class_='name')
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find_next("em", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
- party_info = profile.find('em', string='소속정당')
+ party = "정당 정보 없음"
+ party_info = profile.find("em", string="소속정당")
if party_info:
- party = party_info.find_next('span').get_text(strip=True).split(' ')[-1].strip()
+ party = (
+ party_info.find_next("span").get_text(strip=True).split(" ")[-1].strip()
+ )
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="seoul-gwanakgu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_22(url='https://www.sdc.seoul.kr/kr/member/active.do') -> ScrapResult:
+def scrap_22(url="https://www.sdc.seoul.kr/kr/member/active.do") -> ScrapResult:
"""서울시 서초구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -610,25 +663,27 @@ def scrap_22(url='https://www.sdc.seoul.kr/kr/member/active.do') -> ScrapResult:
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
- name_tag = profile.find_next('em', class_='name')
- name = name_tag.get_text(strip=True).split()[0].strip() if name_tag else "이름 정보 없음"
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find_next("em", class_="name")
+ name = (
+ name_tag.get_text(strip=True).split()[0].strip() if name_tag else "이름 정보 없음"
+ )
- party = '정당 정보 없음'
- party_info = profile.find('em', string='소속정당 : ')
+ party = "정당 정보 없음"
+ party_info = profile.find("em", string="소속정당 : ")
if party_info:
- party = party_info.find_next('span').get_text(strip=True)
+ party = party_info.find_next("span").get_text(strip=True)
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="seoul-seochogu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_23(url='https://www.gncouncil.go.kr/kr/member/name.do') -> ScrapResult:
+def scrap_23(url="https://www.gncouncil.go.kr/kr/member/name.do") -> ScrapResult:
"""서울시 강남구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -637,35 +692,39 @@ def scrap_23(url='https://www.gncouncil.go.kr/kr/member/name.do') -> ScrapResult
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
- name_tag = profile.find_next('div', class_='name')
- name = name_tag.find_next('strong').get_text(strip=True) if name_tag else "이름 정보 없음"
-
- party = '정당 정보 없음'
- party_info = profile.find('em', string='소속정당')
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find_next("div", class_="name")
+ name = (
+ name_tag.find_next("strong").get_text(strip=True)
+ if name_tag
+ else "이름 정보 없음"
+ )
+
+ party = "정당 정보 없음"
+ party_info = profile.find("em", string="소속정당")
if party_info:
- party = party_info.find_next('span').find_next('span').get_text(strip=True)
+ party = party_info.find_next("span").find_next("span").get_text(strip=True)
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="seoul-gangnamgu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_24(url='https://council.songpa.go.kr/kr/member/active.do') -> ScrapResult:
+def scrap_24(url="https://council.songpa.go.kr/kr/member/active.do") -> ScrapResult:
"""서울시 송파구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""
# TODO
- raise Exception('송파구 의회 사이트는 현재 먹통입니다')
+ raise Exception("송파구 의회 사이트는 현재 먹통입니다")
-def scrap_25(url='https://council.gangdong.go.kr/kr/member/active.do') -> ScrapResult:
+def scrap_25(url="https://council.gangdong.go.kr/kr/member/active.do") -> ScrapResult:
"""서울시 강동구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -674,23 +733,25 @@ def scrap_25(url='https://council.gangdong.go.kr/kr/member/active.do') -> ScrapR
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
- name_tag = profile.find_next('em', class_='name')
- name = name_tag.get_text(strip=True).split()[0].strip() if name_tag else "이름 정보 없음"
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find_next("em", class_="name")
+ name = (
+ name_tag.get_text(strip=True).split()[0].strip() if name_tag else "이름 정보 없음"
+ )
- party = '정당 정보 없음'
- party_info = profile.find('em', string='소속정당 : ')
+ party = "정당 정보 없음"
+ party_info = profile.find("em", string="소속정당 : ")
if party_info:
- party = party_info.find_next('span').get_text(strip=True)
+ party = party_info.find_next("span").get_text(strip=True)
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="seoul-gangdonggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-if __name__ == '__main__':
- print(scrap_2())
\ No newline at end of file
+if __name__ == "__main__":
+ print(scrap_2())
diff --git a/scrap/local_councils/ulsan.py b/scrap/local_councils/ulsan.py
index f2a2219..52bcf31 100644
--- a/scrap/local_councils/ulsan.py
+++ b/scrap/local_councils/ulsan.py
@@ -5,24 +5,33 @@
from scrap.utils.requests import get_soup
import re
-regex_pattern = re.compile(r'정\s*\S*\s*당', re.IGNORECASE) # Case-insensitive
+regex_pattern = re.compile(r"정\s*\S*\s*당", re.IGNORECASE) # Case-insensitive
-def scrap_70(url = 'https://council.junggu.ulsan.kr/content/member/memberName.html') -> ScrapResult:
- '''울산시 중구 페이지에서 의원 상세약력 스크랩
+
+def scrap_70(
+ url="https://council.junggu.ulsan.kr/content/member/memberName.html",
+) -> ScrapResult:
+ """울산시 중구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
- '''
+ """
soup = get_soup(url, verify=False)
councilors: List[Councilor] = []
- for profile in soup.find_all('dl'):
+ for profile in soup.find_all("dl"):
name_tag = profile.find("dd", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
-
+
party = "정당 정보 없음"
- party_info = list(filter(lambda x: regex_pattern.search(str(x)), profile.find_all("dd")))
- if party_info and (party_span := party_info[0].find_next('span').find_next('span')) is not None:
+ party_info = list(
+ filter(lambda x: regex_pattern.search(str(x)), profile.find_all("dd"))
+ )
+ if (
+ party_info
+ and (party_span := party_info[0].find_next("span").find_next("span"))
+ is not None
+ ):
party = party_span.text
councilors.append(Councilor(name=name, party=party))
@@ -30,25 +39,36 @@ def scrap_70(url = 'https://council.junggu.ulsan.kr/content/member/memberName.ht
return ScrapResult(
council_id="ulsan-junggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_71(url = 'https://www.namgucouncil.ulsan.kr/content/member/memberName.html') -> ScrapResult:
- '''울산시 남구 페이지에서 의원 상세약력 스크랩
+
+def scrap_71(
+ url="https://www.namgucouncil.ulsan.kr/content/member/memberName.html",
+) -> ScrapResult:
+ """울산시 남구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
- '''
+ """
soup = get_soup(url, verify=False)
councilors: List[Councilor] = []
- for profile in soup.find_all('dl'):
+ for profile in soup.find_all("dl"):
name_tag = profile.find("dd", class_="name")
- name = name_tag.get_text(strip=True).replace(" 의원", "") if name_tag else "이름 정보 없음"
+ name = (
+ name_tag.get_text(strip=True).replace(" 의원", "") if name_tag else "이름 정보 없음"
+ )
party = "정당 정보 없음"
- party_info = list(filter(lambda x: regex_pattern.search(str(x)), profile.find_all("dd")))
- if party_info and (party_span := party_info[0].find_next('span').find_next('span')) is not None:
+ party_info = list(
+ filter(lambda x: regex_pattern.search(str(x)), profile.find_all("dd"))
+ )
+ if (
+ party_info
+ and (party_span := party_info[0].find_next("span").find_next("span"))
+ is not None
+ ):
party = party_span.text
councilors.append(Councilor(name=name, party=party))
@@ -56,65 +76,74 @@ def scrap_71(url = 'https://www.namgucouncil.ulsan.kr/content/member/memberName.
return ScrapResult(
council_id="ulsan-namgu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_72(url = 'https://www.donggu-council.ulsan.kr/source/korean/member/active.html') -> ScrapResult:
- '''울산시 동구 페이지에서 의원 상세약력 스크랩
+
+def scrap_72(
+ url="https://www.donggu-council.ulsan.kr/source/korean/member/active.html",
+) -> ScrapResult:
+ """울산시 동구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
- '''
- soup = get_soup(url, verify=False, encoding='euc-kr')
+ """
+ soup = get_soup(url, verify=False, encoding="euc-kr")
councilors: List[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
+ for profile in soup.find_all("div", class_="profile"):
name_tag = profile.find("li", class_="name")
# () 안에 있는 한자를 제거 (ex. 김영희(金英姬) -> 김영희)
- name = name_tag.get_text(strip=True).split('(')[0] if name_tag else "이름 정보 없음"
+ name = name_tag.get_text(strip=True).split("(")[0] if name_tag else "이름 정보 없음"
party = "정당 정보 없음"
- party_info = list(filter(lambda x: regex_pattern.search(str(x)), profile.find_all("li")))
+ party_info = list(
+ filter(lambda x: regex_pattern.search(str(x)), profile.find_all("li"))
+ )
if party_info:
- party = party_info[0].get_text(strip=True).split(': ')[1]
+ party = party_info[0].get_text(strip=True).split(": ")[1]
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="ulsan-donggu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_73(url = 'https://council.bukgu.ulsan.kr/kr/member/active.do') -> ScrapResult:
- '''울산시 북구 페이지에서 의원 상세약력 스크랩
+
+def scrap_73(url="https://council.bukgu.ulsan.kr/kr/member/active.do") -> ScrapResult:
+ """울산시 북구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
- '''
+ """
soup = get_soup(url, verify=False)
councilors: List[Councilor] = []
- for profile in soup.find_all('dl', class_='profile'):
+ for profile in soup.find_all("dl", class_="profile"):
name_tag = profile.find("strong", class_="name")
# () 안에 있는 한자를 제거 (ex. 김영희(金英姬) -> 김영희)
- name = name_tag.get_text(strip=True).split('(')[0] if name_tag else "이름 정보 없음"
+ name = name_tag.get_text(strip=True).split("(")[0] if name_tag else "이름 정보 없음"
party = "정당 정보 없음"
- party_info = list(filter(lambda x: regex_pattern.search(str(x)), profile.find_all("li")))
+ party_info = list(
+ filter(lambda x: regex_pattern.search(str(x)), profile.find_all("li"))
+ )
if party_info:
- party = party_info[0].get_text(strip=True).split(': ')[1]
+ party = party_info[0].get_text(strip=True).split(": ")[1]
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="ulsan-bukgu",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_74(url = 'https://assembly.ulju.ulsan.kr/kr/member/active') -> ScrapResult:
- '''울산시 울주군 페이지에서 의원 상세약력 스크랩
+
+def scrap_74(url="https://assembly.ulju.ulsan.kr/kr/member/active") -> ScrapResult:
+ """울산시 울주군 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
- '''
+ """
soup = get_soup(url, verify=False)
councilors: List[Councilor] = []
@@ -122,18 +151,18 @@ def scrap_74(url = 'https://assembly.ulju.ulsan.kr/kr/member/active') -> ScrapRe
parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
- for profile in soup.find_all('div', class_='profile'):
+ for profile in soup.find_all("div", class_="profile"):
name_tag = profile.find("em", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
+ party = "정당 정보 없음"
# 프로필보기 링크 가져오기
- profile_link = profile.find('a', class_='start')
+ profile_link = profile.find("a", class_="start")
if profile_link:
- profile_url = base_url + profile_link['href']
+ profile_url = base_url + profile_link["href"]
profile_soup = get_soup(profile_url, verify=False)
- party_info = profile_soup.find('em', string=regex_pattern)
- if party_info and (party_span := party_info.find_next('span')) is not None:
+ party_info = profile_soup.find("em", string=regex_pattern)
+ if party_info and (party_span := party_info.find_next("span")) is not None:
party = party_span.text
councilors.append(Councilor(name=name, party=party))
@@ -141,8 +170,9 @@ def scrap_74(url = 'https://assembly.ulju.ulsan.kr/kr/member/active') -> ScrapRe
return ScrapResult(
council_id="ulsan_uljugun",
council_type=CouncilType.LOCAL_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-if __name__ == '__main__':
- print(scrap_70())
\ No newline at end of file
+
+if __name__ == "__main__":
+ print(scrap_70())
diff --git a/scrap/metropolitan_council.py b/scrap/metropolitan_council.py
index 1c19078..1c31cdf 100644
--- a/scrap/metropolitan_council.py
+++ b/scrap/metropolitan_council.py
@@ -4,12 +4,14 @@
from scrap.utils.requests import get_soup
-def scrap_metro_1(url = 'https://www.smc.seoul.kr/main/memIntro01.do?menuId=001002001001') -> ScrapResult:
- '''서울시 페이지에서 의원 상세약력 스크랩
+def scrap_metro_1(
+ url="https://www.smc.seoul.kr/main/memIntro01.do?menuId=001002001001",
+) -> ScrapResult:
+ """서울시 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
- '''
+ """
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
@@ -17,54 +19,60 @@ def scrap_metro_1(url = 'https://www.smc.seoul.kr/main/memIntro01.do?menuId=0010
parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
- for profile in soup.find_all('input', class_='memLinkk'):
- name = profile['value'].strip() if profile else '이름 정보 없음'
- party = '정당 정보 없음'
+ for profile in soup.find_all("input", class_="memLinkk"):
+ name = profile["value"].strip() if profile else "이름 정보 없음"
+ party = "정당 정보 없음"
# 프로필보기 링크 가져오기
- profile_url = base_url + '/home/' + profile['data-url']
+ profile_url = base_url + "/home/" + profile["data-url"]
profile_soup = get_soup(profile_url, verify=False)
- party_info = profile_soup.find('div', class_='profile')
- if party_info and (party_span := party_info.find('li')) is not None:
- party = party_span.find_next('li').get_text(strip=True)
+ party_info = profile_soup.find("div", class_="profile")
+ if party_info and (party_span := party_info.find("li")) is not None:
+ party = party_span.find_next("li").get_text(strip=True)
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="seoul",
council_type=CouncilType.METROPOLITAN_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_metro_2(url = 'https://council.busan.go.kr/council/past02') -> ScrapResult:
- '''부산시 페이지에서 의원 상세약력 스크랩
+def scrap_metro_2(url="https://council.busan.go.kr/council/past02") -> ScrapResult:
+ """부산시 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
- '''
- soup = get_soup(url, verify=False).find('ul', class_='inmemList')
+ """
+ soup = get_soup(url, verify=False).find("ul", class_="inmemList")
councilors: list[Councilor] = []
- for profile in soup.find_all('a', class_='detail'):
- name = profile.get_text(strip=True) if profile else '이름 정보 없음'
- party = '정당 정보 없음'
+ for profile in soup.find_all("a", class_="detail"):
+ name = profile.get_text(strip=True) if profile else "이름 정보 없음"
+ party = "정당 정보 없음"
# 프로필보기 링크 가져오기
- profile_url = profile['href']
+ profile_url = profile["href"]
profile_soup = get_soup(profile_url, verify=False)
- party_info = profile_soup.find('ul', class_='vs-list-st-type01')
- if party_info and (party_span := party_info.find('li')) is not None:
- party = party_span.find_next('li').find_next('li').get_text(strip=True).split()[-1].strip()
+ party_info = profile_soup.find("ul", class_="vs-list-st-type01")
+ if party_info and (party_span := party_info.find("li")) is not None:
+ party = (
+ party_span.find_next("li")
+ .find_next("li")
+ .get_text(strip=True)
+ .split()[-1]
+ .strip()
+ )
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="busan",
council_type=CouncilType.METROPOLITAN_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
@@ -82,7 +90,7 @@ def scrap_metro_3(url="https://council.daegu.go.kr/kr/member/active") -> ScrapRe
name_tag = profile.find("p", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party = '정당 정보 없음'
+ party = "정당 정보 없음"
party_info = profile.find("em", string="소속정당")
if party_info:
party = party_info.find_next("span").get_text(strip=True)
@@ -92,7 +100,7 @@ def scrap_metro_3(url="https://council.daegu.go.kr/kr/member/active") -> ScrapRe
return ScrapResult(
council_id="daegu",
council_type=CouncilType.METROPOLITAN_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
@@ -103,11 +111,11 @@ def scrap_metro_4(url="https://www.icouncil.go.kr/main/member/name.jsp") -> Scra
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""
- soup = get_soup(url, verify=False).find('table', class_='data').find('tbody')
+ soup = get_soup(url, verify=False).find("table", class_="data").find("tbody")
councilors: list[Councilor] = []
for profile in soup.find_all("tr"):
- columns = profile.find_all('td')
+ columns = profile.find_all("td")
name_tag = columns[0]
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
@@ -120,7 +128,7 @@ def scrap_metro_4(url="https://www.icouncil.go.kr/main/member/name.jsp") -> Scra
return ScrapResult(
council_id="incheon",
council_type=CouncilType.METROPOLITAN_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
@@ -131,7 +139,7 @@ def scrap_metro_5(url="https://council.gwangju.go.kr/index.do?PID=029") -> Scrap
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""
- soup = get_soup(url, verify=False).find('table', class_='data').find('tbody')
+ soup = get_soup(url, verify=False).find("table", class_="data").find("tbody")
councilors: list[Councilor] = []
# TODO
@@ -139,37 +147,41 @@ def scrap_metro_5(url="https://council.gwangju.go.kr/index.do?PID=029") -> Scrap
return ScrapResult(
council_id="gwangju",
council_type=CouncilType.METROPOLITAN_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_metro_6(url="https://council.daejeon.go.kr/svc/cmp/MbrListByPhoto.do") -> ScrapResult:
+def scrap_metro_6(
+ url="https://council.daejeon.go.kr/svc/cmp/MbrListByPhoto.do",
+) -> ScrapResult:
"""대전시 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""
- soup = get_soup(url, verify=False).find('ul', class_='mlist')
+ soup = get_soup(url, verify=False).find("ul", class_="mlist")
councilors: list[Councilor] = []
for profile in soup.find_all("dl"):
- name_tag = profile.find('dd', class_='name')
- name = name_tag.find('strong').get_text(strip=True) if name_tag else "이름 정보 없음"
+ name_tag = profile.find("dd", class_="name")
+ name = name_tag.find("strong").get_text(strip=True) if name_tag else "이름 정보 없음"
- party_tag = name_tag.find_next('dd').find_next('dd')
- party = party_tag.find('i').get_text(strip=True) if party_tag else "정당 정보 없음"
+ party_tag = name_tag.find_next("dd").find_next("dd")
+ party = party_tag.find("i").get_text(strip=True) if party_tag else "정당 정보 없음"
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="daejeon",
council_type=CouncilType.METROPOLITAN_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_metro_7(url="https://www.council.ulsan.kr/kor/councillor/viewByPerson.do") -> ScrapResult:
+def scrap_metro_7(
+ url="https://www.council.ulsan.kr/kor/councillor/viewByPerson.do",
+) -> ScrapResult:
"""울산시 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -179,10 +191,10 @@ def scrap_metro_7(url="https://www.council.ulsan.kr/kor/councillor/viewByPerson.
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for name_tag in soup.find_all("div", class_='name'):
+ for name_tag in soup.find_all("div", class_="name"):
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party_tag = name_tag.find_next('li').find_next('li')
+ party_tag = name_tag.find_next("li").find_next("li")
party = party_tag.get_text(strip=True) if party_tag else "정당 정보 없음"
councilors.append(Councilor(name=name, party=party))
@@ -190,57 +202,71 @@ def scrap_metro_7(url="https://www.council.ulsan.kr/kor/councillor/viewByPerson.
return ScrapResult(
council_id="ulsan",
council_type=CouncilType.METROPOLITAN_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_metro_8(url="https://council.sejong.go.kr/mnu/pom/introductionMemberByName.do") -> ScrapResult:
+def scrap_metro_8(
+ url="https://council.sejong.go.kr/mnu/pom/introductionMemberByName.do",
+) -> ScrapResult:
"""세종시 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""
- soup = get_soup(url, verify=False).find('ul', class_='ml')
+ soup = get_soup(url, verify=False).find("ul", class_="ml")
councilors: list[Councilor] = []
- for profile in soup.find_all('dl'):
- name_tag = profile.find('dd', class_='name')
- name = name_tag.find(string=True, recursive=False).strip() if name_tag else "이름 정보 없음"
-
- party_tag = name_tag.find_next('dd').find_next('dd')
- party = party_tag.get_text(strip=True).split()[-1].strip() if party_tag else "정당 정보 없음"
+ for profile in soup.find_all("dl"):
+ name_tag = profile.find("dd", class_="name")
+ name = (
+ name_tag.find(string=True, recursive=False).strip()
+ if name_tag
+ else "이름 정보 없음"
+ )
+
+ party_tag = name_tag.find_next("dd").find_next("dd")
+ party = (
+ party_tag.get_text(strip=True).split()[-1].strip()
+ if party_tag
+ else "정당 정보 없음"
+ )
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="sejong",
council_type=CouncilType.METROPOLITAN_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_metro_9(url="https://www.ggc.go.kr/site/main/memberInfo/actvMmbr/list?cp=1&menu=consonant&sortOrder=MI_NAME&sortDirection=ASC") -> ScrapResult:
+def scrap_metro_9(
+ url="https://www.ggc.go.kr/site/main/memberInfo/actvMmbr/list?cp=1&menu=consonant&sortOrder=MI_NAME&sortDirection=ASC",
+) -> ScrapResult:
"""경기도 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""
- soup = get_soup(url, verify=False).find('div', class_='paging2 clearfix')
+ soup = get_soup(url, verify=False).find("div", class_="paging2 clearfix")
councilors: list[Councilor] = []
-
+
parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
-
- for page in soup.find_all('a'):
- page_url = base_url + page['href']
- page_soup = get_soup(page_url, verify=False).find('ul', class_='memberList3 clear')
- for profile in page_soup.find_all('li', recursive=False):
- name_tag = profile.find('p', class_='f22 blue3')
+
+ for page in soup.find_all("a"):
+ page_url = base_url + page["href"]
+ page_soup = get_soup(page_url, verify=False).find(
+ "ul", class_="memberList3 clear"
+ )
+ for profile in page_soup.find_all("li", recursive=False):
+ name_tag = profile.find("p", class_="f22 blue3")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party_tag = profile.find('li', class_='f15 m0')
+ party_tag = profile.find("li", class_="f15 m0")
party = party_tag.get_text(strip=True) if party_tag else "정당 정보 없음"
councilors.append(Councilor(name=name, party=party))
@@ -248,11 +274,13 @@ def scrap_metro_9(url="https://www.ggc.go.kr/site/main/memberInfo/actvMmbr/list?
return ScrapResult(
council_id="gyeonggi",
council_type=CouncilType.METROPOLITAN_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_metro_10(url="https://council.chungbuk.kr/kr/member/active.do") -> ScrapResult:
+def scrap_metro_10(
+ url="https://council.chungbuk.kr/kr/member/active.do",
+) -> ScrapResult:
"""충청북도 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -262,23 +290,31 @@ def scrap_metro_10(url="https://council.chungbuk.kr/kr/member/active.do") -> Scr
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
- name_tag = profile.find('em', class_='name')
- name = name_tag.get_text(strip=True).split()[0].strip() if name_tag else "이름 정보 없음"
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find("em", class_="name")
+ name = (
+ name_tag.get_text(strip=True).split()[0].strip() if name_tag else "이름 정보 없음"
+ )
- party_tag = profile.find('em', string='소속정당')
- party = party_tag.find_next('span').find_next('span').get_text(strip=True) if party_tag else "정당 정보 없음"
+ party_tag = profile.find("em", string="소속정당")
+ party = (
+ party_tag.find_next("span").find_next("span").get_text(strip=True)
+ if party_tag
+ else "정당 정보 없음"
+ )
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="chungbuk",
council_type=CouncilType.METROPOLITAN_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_metro_11(url="https://council.chungnam.go.kr/kr/member/name.do") -> ScrapResult:
+def scrap_metro_11(
+ url="https://council.chungnam.go.kr/kr/member/name.do",
+) -> ScrapResult:
"""충청남도 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -288,23 +324,31 @@ def scrap_metro_11(url="https://council.chungnam.go.kr/kr/member/name.do") -> Sc
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
- name_tag = profile.find('em', class_='name')
- name = name_tag.get_text(strip=True).split()[0].strip() if name_tag else "이름 정보 없음"
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find("em", class_="name")
+ name = (
+ name_tag.get_text(strip=True).split()[0].strip() if name_tag else "이름 정보 없음"
+ )
- party_tag = profile.find('em', string='소속정당 : ')
- party = party_tag.find_next('span').get_text(strip=True) if party_tag else "정당 정보 없음"
+ party_tag = profile.find("em", string="소속정당 : ")
+ party = (
+ party_tag.find_next("span").get_text(strip=True)
+ if party_tag
+ else "정당 정보 없음"
+ )
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="chungnam",
council_type=CouncilType.METROPOLITAN_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_metro_12(url="https://www.assem.jeonbuk.kr/board/list.do?boardId=2018_assemblyman&searchType=assem_check&keyword=1&menuCd=DOM_000000103001000000&contentsSid=453") -> ScrapResult:
+def scrap_metro_12(
+ url="https://www.assem.jeonbuk.kr/board/list.do?boardId=2018_assemblyman&searchType=assem_check&keyword=1&menuCd=DOM_000000103001000000&contentsSid=453",
+) -> ScrapResult:
"""전라북도 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -314,23 +358,29 @@ def scrap_metro_12(url="https://www.assem.jeonbuk.kr/board/list.do?boardId=2018_
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('li', class_='career'):
- name_tag = profile.find('tr', class_='name')
+ for profile in soup.find_all("li", class_="career"):
+ name_tag = profile.find("tr", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party_tag = profile.find('tr', class_='list1')
- party = party_tag.find('td', class_='co2').get_text(strip=True) if party_tag else "정당 정보 없음"
+ party_tag = profile.find("tr", class_="list1")
+ party = (
+ party_tag.find("td", class_="co2").get_text(strip=True)
+ if party_tag
+ else "정당 정보 없음"
+ )
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="jeonbuk",
council_type=CouncilType.METROPOLITAN_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_metro_13(url="https://www.jnassembly.go.kr/profileHistory.es?mid=a10202010000&cs_daesoo=12") -> ScrapResult:
+def scrap_metro_13(
+ url="https://www.jnassembly.go.kr/profileHistory.es?mid=a10202010000&cs_daesoo=12",
+) -> ScrapResult:
"""전라남도 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -340,19 +390,23 @@ def scrap_metro_13(url="https://www.jnassembly.go.kr/profileHistory.es?mid=a1020
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('tbody'):
- name_tag = profile.find('p')
+ for profile in soup.find_all("tbody"):
+ name_tag = profile.find("p")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party_tag = profile.find('th', string='소속정당')
- party = party_tag.find_next('td', class_='txt_left').get_text(strip=True) if party_tag else "정당 정보 없음"
+ party_tag = profile.find("th", string="소속정당")
+ party = (
+ party_tag.find_next("td", class_="txt_left").get_text(strip=True)
+ if party_tag
+ else "정당 정보 없음"
+ )
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="jeonnam",
council_type=CouncilType.METROPOLITAN_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
@@ -366,23 +420,29 @@ def scrap_metro_14(url="https://council.gb.go.kr/kr/member/name") -> ScrapResult
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
- name_tag = profile.find('div', class_='name')
- name = name_tag.find('strong').get_text(strip=True) if name_tag else "이름 정보 없음"
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find("div", class_="name")
+ name = name_tag.find("strong").get_text(strip=True) if name_tag else "이름 정보 없음"
- party_tag = profile.find('em', string='소속정당')
- party = party_tag.find_next('span').find_next('span').get_text(strip=True) if party_tag else "정당 정보 없음"
+ party_tag = profile.find("em", string="소속정당")
+ party = (
+ party_tag.find_next("span").find_next("span").get_text(strip=True)
+ if party_tag
+ else "정당 정보 없음"
+ )
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="gyeongbuk",
council_type=CouncilType.METROPOLITAN_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_metro_15(url="https://council.gyeongnam.go.kr/kr/member/active.do") -> ScrapResult:
+def scrap_metro_15(
+ url="https://council.gyeongnam.go.kr/kr/member/active.do",
+) -> ScrapResult:
"""경상남도 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -392,19 +452,27 @@ def scrap_metro_15(url="https://council.gyeongnam.go.kr/kr/member/active.do") ->
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
- name_tag = profile.find('div', class_='name')
- name = name_tag.find('strong').get_text(strip=True).split('(')[0].strip() if name_tag else "이름 정보 없음"
-
- party_tag = profile.find('em', class_='ls2', string='정당')
- party = party_tag.find_next('span').get_text(strip=True) if party_tag else "정당 정보 없음"
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find("div", class_="name")
+ name = (
+ name_tag.find("strong").get_text(strip=True).split("(")[0].strip()
+ if name_tag
+ else "이름 정보 없음"
+ )
+
+ party_tag = profile.find("em", class_="ls2", string="정당")
+ party = (
+ party_tag.find_next("span").get_text(strip=True)
+ if party_tag
+ else "정당 정보 없음"
+ )
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="gyeongnam",
council_type=CouncilType.METROPOLITAN_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
@@ -418,23 +486,29 @@ def scrap_metro_16(url="https://council.gangwon.kr/kr/member/name.do") -> ScrapR
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for profile in soup.find_all('div', class_='profile'):
- name_tag = profile.find('em', class_='name')
+ for profile in soup.find_all("div", class_="profile"):
+ name_tag = profile.find("em", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"
- party_tag = profile.find('em', string='소속정당')
- party = party_tag.find_next('span').get_text(strip=True).split()[-1].strip() if party_tag else "정당 정보 없음"
+ party_tag = profile.find("em", string="소속정당")
+ party = (
+ party_tag.find_next("span").get_text(strip=True).split()[-1].strip()
+ if party_tag
+ else "정당 정보 없음"
+ )
councilors.append(Councilor(name=name, party=party))
return ScrapResult(
council_id="gangwon",
council_type=CouncilType.METROPOLITAN_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-def scrap_metro_17(url="https://www.council.jeju.kr/cmember/active/name.do") -> ScrapResult:
+def scrap_metro_17(
+ url="https://www.council.jeju.kr/cmember/active/name.do",
+) -> ScrapResult:
"""제주도 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
@@ -444,7 +518,7 @@ def scrap_metro_17(url="https://www.council.jeju.kr/cmember/active/name.do") ->
soup = get_soup(url, verify=False)
councilors: list[Councilor] = []
- for tag in soup.find_all('p', class_='name'):
+ for tag in soup.find_all("p", class_="name"):
text = tag.get_text(strip=True).split("(")
# print(text)
name = text[0].strip()
@@ -455,10 +529,9 @@ def scrap_metro_17(url="https://www.council.jeju.kr/cmember/active/name.do") ->
return ScrapResult(
council_id="jeju",
council_type=CouncilType.METROPOLITAN_COUNCIL,
- councilors=councilors
+ councilors=councilors,
)
-
-if __name__ == '__main__':
- print(scrap_metro_17())
\ No newline at end of file
+if __name__ == "__main__":
+ print(scrap_metro_17())
diff --git a/scrap/national_council.py b/scrap/national_council.py
index b058abf..6c4656e 100644
--- a/scrap/national_council.py
+++ b/scrap/national_council.py
@@ -8,42 +8,43 @@
def scrap_national_council(cd: int) -> ScrapResult:
- '''열린국회정보 Open API를 이용해 역대 국회의원 인적사항 스크랩
- _data 폴더에 assembly_api_key.json 파일을 만들어야 하며,
- 해당 JSON은 {"key":"(Open API에서 발급받은 인증키)"} 꼴을 가져야 한다.
- https://open.assembly.go.kr/portal/data/service/selectAPIServicePage.do/OBL7NF0011935G18076#none
-
- :param cd: 국회의원 대수. 제21대 국회의원을 스크랩하고자 하면 21
- :return: 국회의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
- '''
-
- key_json_path = os.path.join(BASE_DIR, '_data', 'assembly_api_key.json')
- if not os.path.exists(key_json_path):
- raise Exception('열린국회정보 Open API에 회원가입 후 인증키를 발급받아주세요.\nhttps://open.assembly.go.kr/portal/openapi/openApiDevPage.do')
- with open(key_json_path, 'r') as key_json:
- assembly_key = json.load(key_json)['key']
-
- request_url = f"https://open.assembly.go.kr/portal/openapi/nwvrqwxyaytdsfvhu?KEY={assembly_key}&pSize=500&UNIT_CD={cd + 100000}"
- response = requests.get(request_url)
-
- if response.status_code != 200:
- raise Exception(f'Open API 요청에 실패했습니다 (상태 코드 {response.status_code})')
-
- root = ET.fromstring(response.text)
- councilors: list[Councilor] = []
-
- for row in root.iter('row'):
- councilors.append(Councilor(
- name=row.find('HG_NM').text,
- party=row.find('POLY_NM').text
- ))
-
- return ScrapResult(
- council_id='national',
- council_type=CouncilType.NATIONAL_COUNCIL,
- councilors=councilors
- )
-
-
-if __name__ == '__main__':
- print(scrap_national_council(21))
\ No newline at end of file
+ """열린국회정보 Open API를 이용해 역대 국회의원 인적사항 스크랩
+ _data 폴더에 assembly_api_key.json 파일을 만들어야 하며,
+ 해당 JSON은 {"key":"(Open API에서 발급받은 인증키)"} 꼴을 가져야 한다.
+ https://open.assembly.go.kr/portal/data/service/selectAPIServicePage.do/OBL7NF0011935G18076#none
+
+ :param cd: 국회의원 대수. 제21대 국회의원을 스크랩하고자 하면 21
+ :return: 국회의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
+ """
+
+ key_json_path = os.path.join(BASE_DIR, "_data", "assembly_api_key.json")
+ if not os.path.exists(key_json_path):
+ raise Exception(
+ "열린국회정보 Open API에 회원가입 후 인증키를 발급받아주세요.\nhttps://open.assembly.go.kr/portal/openapi/openApiDevPage.do"
+ )
+ with open(key_json_path, "r") as key_json:
+ assembly_key = json.load(key_json)["key"]
+
+ request_url = f"https://open.assembly.go.kr/portal/openapi/nwvrqwxyaytdsfvhu?KEY={assembly_key}&pSize=500&UNIT_CD={cd + 100000}"
+ response = requests.get(request_url)
+
+ if response.status_code != 200:
+ raise Exception(f"Open API 요청에 실패했습니다 (상태 코드 {response.status_code})")
+
+ root = ET.fromstring(response.text)
+ councilors: list[Councilor] = []
+
+ for row in root.iter("row"):
+ councilors.append(
+ Councilor(name=row.find("HG_NM").text, party=row.find("POLY_NM").text)
+ )
+
+ return ScrapResult(
+ council_id="national",
+ council_type=CouncilType.NATIONAL_COUNCIL,
+ councilors=councilors,
+ )
+
+
+if __name__ == "__main__":
+ print(scrap_national_council(21))
diff --git a/scrap/utils/database.py b/scrap/utils/database.py
index c197c6e..9014802 100644
--- a/scrap/utils/database.py
+++ b/scrap/utils/database.py
@@ -9,6 +9,7 @@
# 컬렉션은 하나 이상의 문서로 구성됩니다.
db = client[str(MongoDBSecrets.database_name)]
+
def save_to_database(record: ScrapResult):
"""
지방의회 크롤링 결과를 데이터베이스에 저장합니다.
@@ -25,20 +26,21 @@ def save_to_database(record: ScrapResult):
collection.find_one_and_update(
{"councilId": record.council_id},
{"$set": dataclasses.asdict(record)},
- upsert=True
+ upsert=True,
)
return True
except Exception as e:
print(e)
return False
-if __name__ == "__main__":
- test_record = (ScrapResult(
+
+if __name__ == "__main__":
+ test_record = ScrapResult(
council_id="test-test",
council_type=CouncilType.LOCAL_COUNCIL,
councilors=[
Councilor(name="김철수", party="국민의힘"),
Councilor(name="김영희", party="더불어민주당"),
- ]
- ))
- print(save_to_database(test_record))
\ No newline at end of file
+ ],
+ )
+ print(save_to_database(test_record))
diff --git a/scrap/utils/requests.py b/scrap/utils/requests.py
index 2bf72b6..16a2135 100644
--- a/scrap/utils/requests.py
+++ b/scrap/utils/requests.py
@@ -8,15 +8,18 @@
from unicodedata import normalize
# SSL 인증서 검증 경고 무시
-requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning) # type: ignore
+requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning) # type: ignore
# 충청북도 보은군, 강진시에서 타임아웃이
timeout_time = 60
-def get_soup(url: str, additional_headers={}, verify=True, encoding="utf-8") -> BeautifulSoup:
+
+def get_soup(
+ url: str, additional_headers={}, verify=True, encoding="utf-8"
+) -> BeautifulSoup:
"""
url을 입력받아 BeautifulSoup 객체를 반환합니다.
requests 라이브러리를 사용합니다. 크롤링 결과가 정상적으로 나오지 않을 경우, Selenium 라이브러리를 사용할 수 있습니다.
-
+
:param url: 크롤링할 페이지의 url입니다.
:param additional_headers: 추가적으로 포함할 헤더입니다. 딕셔너리 형태로 입력받습니다.
:param verify: SSL 인증서 검증 여부입니다. 인증서가 만료된 페이지를 크롤링할 경우 False로 설정합니다.
@@ -25,11 +28,13 @@ def get_soup(url: str, additional_headers={}, verify=True, encoding="utf-8") ->
# HTTP 요청에 포함해줄 헤더
http_headers = {
- "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"
}
http_headers.update(additional_headers)
-
- response = requests.get(url, verify=verify, headers=http_headers, timeout=timeout_time)
+
+ response = requests.get(
+ url, verify=verify, headers=http_headers, timeout=timeout_time
+ )
response.encoding = encoding
- sanitized_response = normalize('NFKC', unescape(response.text))
- return BeautifulSoup(sanitized_response, 'html.parser')
\ No newline at end of file
+ sanitized_response = normalize("NFKC", unescape(response.text))
+ return BeautifulSoup(sanitized_response, "html.parser")
diff --git a/scrap/utils/spreadsheet.py b/scrap/utils/spreadsheet.py
index 49f4ce6..47da40a 100644
--- a/scrap/utils/spreadsheet.py
+++ b/scrap/utils/spreadsheet.py
@@ -15,119 +15,402 @@
# 변경 시 token.json 삭제 후 재인증 필요
SCOPES = ["https://www.googleapis.com/auth/spreadsheets"]
BASE_DIR = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)
+
+
def google_authorization():
- '''Google Sheets API 활용을 위한 인증 정보 요청
+ """Google Sheets API 활용을 위한 인증 정보 요청
credentials.json 파일을 토대로 인증을 요청하되, token.json 파일이 존재할 경우 거기에 저장된 정보 활용
:todo: credentials.json 파일, token.json 파일 값을 환경변수로 설정
- :return: gspread.client.Client 인스턴스'''
+ :return: gspread.client.Client 인스턴스"""
creds = None
- token_json_path = os.path.join(BASE_DIR, '_data', 'token.json')
+ token_json_path = os.path.join(BASE_DIR, "_data", "token.json")
# 이미 저장된 인증 정보가 있는지 확인
if os.path.exists(token_json_path):
creds = Credentials.from_authorized_user_file(token_json_path, SCOPES)
-
+
# 인증 정보가 없거나 비정상적인 경우 인증 재요청
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
- flow= InstalledAppFlow.from_client_secrets_file(os.path.join(BASE_DIR, '_data', 'credentials.json'), SCOPES)
+ flow = InstalledAppFlow.from_client_secrets_file(
+ os.path.join(BASE_DIR, "_data", "credentials.json"), SCOPES
+ )
creds = flow.run_local_server(port=0)
- with open(token_json_path, 'w') as token:
+ with open(token_json_path, "w") as token:
token.write(creds.to_json())
return gspread.authorize(creds)
+
def main() -> None:
# Google Sheets API 설정
client: gspread.client.Client = google_authorization()
# 스프레드시트 열기
- link = 'https://docs.google.com/spreadsheets/d/1fBDJjkw8FSN5wXrvos9Q2wDsyItkUtNFGOxUZYE-h0M/edit#gid=1127955905' # T4I-의회목록
+ link = "https://docs.google.com/spreadsheets/d/1fBDJjkw8FSN5wXrvos9Q2wDsyItkUtNFGOxUZYE-h0M/edit#gid=1127955905" # T4I-의회목록
spreadsheet: gspread.Spreadsheet = client.open_by_url(link)
- worksheet: gspread.Worksheet = spreadsheet.get_worksheet(0) # 원하는 워크시트 선택 (0은 첫 번째 워크시트입니다.)
- # TODO - 홈페이지 위 charset=euc-kr 등을 인식해 바로 가져오기.
+ worksheet: gspread.Worksheet = spreadsheet.get_worksheet(
+ 0
+ ) # 원하는 워크시트 선택 (0은 첫 번째 워크시트입니다.)
+ # TODO - 홈페이지 위 charset=euc-kr 등을 인식해 바로 가져오기.
euc_kr = [6, 13, 16, 31, 72, 88, 112, 154, 157, 163, 167, 181, 197, 202]
special_functions = list(range(1, 57)) + [57, 88, 103]
args = {
- 2 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_elt='em'),
- 3 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_elt='em'),
+ 2: ScrapBasicArgument(
+ pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em"
+ ),
+ 3: ScrapBasicArgument(
+ pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em"
+ ),
# 인천
- 57 : ScrapBasicArgument(pf_elt='div', pf_cls='box', name_elt='p', name_cls='mem_tit2', pty_elt='p', pty_cls='mem_tit2'),
- 58 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_elt='em'),
- 59 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='div', name_cls='name', pty_elt='em'),
+ 57: ScrapBasicArgument(
+ pf_elt="div",
+ pf_cls="box",
+ name_elt="p",
+ name_cls="mem_tit2",
+ pty_elt="p",
+ pty_cls="mem_tit2",
+ ),
+ 58: ScrapBasicArgument(
+ pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em"
+ ),
+ 59: ScrapBasicArgument(
+ pf_elt="div",
+ pf_cls="profile",
+ name_elt="div",
+ name_cls="name",
+ pty_elt="em",
+ ),
# 광주
- 60 : ScrapBasicArgument(pf_elt='div', pf_cls='content', name_elt='h5', pty_wrapelt='a', pty_elt='li'),
- 61 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_elt='em'),
+ 60: ScrapBasicArgument(
+ pf_elt="div", pf_cls="content", name_elt="h5", pty_wrapelt="a", pty_elt="li"
+ ),
+ 61: ScrapBasicArgument(
+ pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em"
+ ),
# 62 : TODO! /common/selectCouncilMemberProfile.json 을 어떻게 얻을지..
# 63 : TODO! 홈페이지 터짐
# 64 : TODO! /common/selectCouncilMemberProfile.json 을 어떻게 얻을지..
# 대전
- 65 : ScrapBasicArgument(pf_elt='dl', pf_cls='profile', name_elt='strong', name_cls='name', pty_elt='strong'),
- 66 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='div', name_cls='name', pty_elt='em'),
- 67 : ScrapBasicArgument(pf_memlistelt='section', pf_memlistcls='member', pf_elt='dl', name_elt='dd', name_cls='name', pty_elt='dd'),
- 68 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_elt='em'),
- 69 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_elt='em'),
+ 65: ScrapBasicArgument(
+ pf_elt="dl",
+ pf_cls="profile",
+ name_elt="strong",
+ name_cls="name",
+ pty_elt="strong",
+ ),
+ 66: ScrapBasicArgument(
+ pf_elt="div",
+ pf_cls="profile",
+ name_elt="div",
+ name_cls="name",
+ pty_elt="em",
+ ),
+ 67: ScrapBasicArgument(
+ pf_memlistelt="section",
+ pf_memlistcls="member",
+ pf_elt="dl",
+ name_elt="dd",
+ name_cls="name",
+ pty_elt="dd",
+ ),
+ 68: ScrapBasicArgument(
+ pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em"
+ ),
+ 69: ScrapBasicArgument(
+ pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em"
+ ),
# 울산
- 70 : ScrapBasicArgument(pf_memlistelt='section', pf_memlistcls='memberName', pf_elt='dl', name_elt='dd', name_cls='name', pty_elt='dd'),
- 71 : ScrapBasicArgument(pf_memlistelt='section', pf_memlistcls='memberName', pf_elt='dl', name_elt='dd', name_cls='name', pty_elt='dd'),
- 72 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='li', name_cls='name', pty_elt='li'),
- 73 : ScrapBasicArgument(pf_elt='dl', pf_cls='profile', name_elt='strong', name_cls='name', pty_elt='li'),
- 74 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_wrapelt='a', pty_wrapcls='start', pty_elt='li'),
+ 70: ScrapBasicArgument(
+ pf_memlistelt="section",
+ pf_memlistcls="memberName",
+ pf_elt="dl",
+ name_elt="dd",
+ name_cls="name",
+ pty_elt="dd",
+ ),
+ 71: ScrapBasicArgument(
+ pf_memlistelt="section",
+ pf_memlistcls="memberName",
+ pf_elt="dl",
+ name_elt="dd",
+ name_cls="name",
+ pty_elt="dd",
+ ),
+ 72: ScrapBasicArgument(
+ pf_elt="div", pf_cls="profile", name_elt="li", name_cls="name", pty_elt="li"
+ ),
+ 73: ScrapBasicArgument(
+ pf_elt="dl",
+ pf_cls="profile",
+ name_elt="strong",
+ name_cls="name",
+ pty_elt="li",
+ ),
+ 74: ScrapBasicArgument(
+ pf_elt="div",
+ pf_cls="profile",
+ name_elt="em",
+ name_cls="name",
+ pty_wrapelt="a",
+ pty_wrapcls="start",
+ pty_elt="li",
+ ),
# 경기
- 75 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='div', name_cls='name', pty_elt='em'),
- 76 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_elt='em'),
- 77 : ScrapBasicArgument(pf_memlistelt='section', pf_memlistcls='mbrListByName', pf_elt='dl', name_elt='dd', name_cls='name', pty_elt='dd'),
- 78 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='div', name_cls='name', pty_wrapelt='a', pty_wrapcls='end', pty_elt='li'),
- 79 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_elt='em'),
- 80 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_elt='em'),
- 81 : ScrapBasicArgument(pf_memlistelt='div', pf_memlistcls='member_list', pf_elt='dd', name_elt='p', pty_elt='tr'),
- 82 : ScrapBasicArgument(pf_memlistelt='div', pf_memlistcls='cts1426_box', pf_elt='div', pf_cls='conbox', name_elt='p', pty_elt='li'),
+ 75: ScrapBasicArgument(
+ pf_elt="div",
+ pf_cls="profile",
+ name_elt="div",
+ name_cls="name",
+ pty_elt="em",
+ ),
+ 76: ScrapBasicArgument(
+ pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em"
+ ),
+ 77: ScrapBasicArgument(
+ pf_memlistelt="section",
+ pf_memlistcls="mbrListByName",
+ pf_elt="dl",
+ name_elt="dd",
+ name_cls="name",
+ pty_elt="dd",
+ ),
+ 78: ScrapBasicArgument(
+ pf_elt="div",
+ pf_cls="profile",
+ name_elt="div",
+ name_cls="name",
+ pty_wrapelt="a",
+ pty_wrapcls="end",
+ pty_elt="li",
+ ),
+ 79: ScrapBasicArgument(
+ pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em"
+ ),
+ 80: ScrapBasicArgument(
+ pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em"
+ ),
+ 81: ScrapBasicArgument(
+ pf_memlistelt="div",
+ pf_memlistcls="member_list",
+ pf_elt="dd",
+ name_elt="p",
+ pty_elt="tr",
+ ),
+ 82: ScrapBasicArgument(
+ pf_memlistelt="div",
+ pf_memlistcls="cts1426_box",
+ pf_elt="div",
+ pf_cls="conbox",
+ name_elt="p",
+ pty_elt="li",
+ ),
# 경기 - 동두천
- 83 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_wrapelt='a', pty_wrapcls='start', pty_elt='li'),
- 84 : ScrapBasicArgument(pf_elt='div', pf_cls='law_box', name_elt='span', name_cls='name', pty_elt='p'),
- 85 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='div', name_cls='name', pty_elt='em'),
- 86 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_elt='em'),
- 87 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_elt='em'),
- 88 : ScrapBasicArgument(pf_memlistelt='div', pf_memlistcls='member_list', pf_elt='dl', pf_cls='box', name_elt='span', name_cls='name', pty_wrapelt='p', pty_wrapcls='btn', pty_elt='li'),
- 89 : ScrapBasicArgument(pf_memlistelt='section', pf_memlistcls='memberName', pf_elt='dl', name_elt='dd', name_cls='name', pty_elt='span'),
- 90 : ScrapBasicArgument(pf_elt='dl', pf_cls='profile', name_elt='strong', name_cls='name', pty_elt='li'),
- # 경기 - 화성
- 91 : ScrapBasicArgument(pf_memlistelt='section', pf_memlistcls='mbr0101', pf_elt='dl', name_elt='dd', name_cls='name', pty_elt='dd'),
- 92 : ScrapBasicArgument(pf_memlistelt='section', pf_memlistcls='member', pf_elt='dl', name_elt='dd', name_cls='name', pty_elt='dd'),
- 93 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='div', name_cls='name', pty_wrapelt='a', pty_wrapcls='end', pty_elt='li'),
- 94 : ScrapBasicArgument(pf_memlistelt='section', pf_memlistcls='mbrListByName', pf_elt='dl', name_elt='dd', name_cls='name', pty_elt='dd'),
- 95 : ScrapBasicArgument(pf_memlistelt='section', pf_memlistcls='member', pf_elt='dl', name_elt='dd', name_cls='name', pty_elt='tr'),
- 96 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='div', name_cls='name', pty_elt='em'),
- 97 : ScrapBasicArgument(pf_memlistelt='ul', pf_memlistcls='memberList', pf_elt='li', name_elt='strong', pty_wrapelt='a', pty_elt='tr'),
- 98 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_elt='em'),
- 99 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_elt='em'),
- 100 : ScrapBasicArgument(pf_elt='div', pf_cls='list', name_elt='h4', name_cls='h0', pty_elt='li'),
+ 83: ScrapBasicArgument(
+ pf_elt="div",
+ pf_cls="profile",
+ name_elt="em",
+ name_cls="name",
+ pty_wrapelt="a",
+ pty_wrapcls="start",
+ pty_elt="li",
+ ),
+ 84: ScrapBasicArgument(
+ pf_elt="div",
+ pf_cls="law_box",
+ name_elt="span",
+ name_cls="name",
+ pty_elt="p",
+ ),
+ 85: ScrapBasicArgument(
+ pf_elt="div",
+ pf_cls="profile",
+ name_elt="div",
+ name_cls="name",
+ pty_elt="em",
+ ),
+ 86: ScrapBasicArgument(
+ pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em"
+ ),
+ 87: ScrapBasicArgument(
+ pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em"
+ ),
+ 88: ScrapBasicArgument(
+ pf_memlistelt="div",
+ pf_memlistcls="member_list",
+ pf_elt="dl",
+ pf_cls="box",
+ name_elt="span",
+ name_cls="name",
+ pty_wrapelt="p",
+ pty_wrapcls="btn",
+ pty_elt="li",
+ ),
+ 89: ScrapBasicArgument(
+ pf_memlistelt="section",
+ pf_memlistcls="memberName",
+ pf_elt="dl",
+ name_elt="dd",
+ name_cls="name",
+ pty_elt="span",
+ ),
+ 90: ScrapBasicArgument(
+ pf_elt="dl",
+ pf_cls="profile",
+ name_elt="strong",
+ name_cls="name",
+ pty_elt="li",
+ ),
+ # 경기 - 화성
+ 91: ScrapBasicArgument(
+ pf_memlistelt="section",
+ pf_memlistcls="mbr0101",
+ pf_elt="dl",
+ name_elt="dd",
+ name_cls="name",
+ pty_elt="dd",
+ ),
+ 92: ScrapBasicArgument(
+ pf_memlistelt="section",
+ pf_memlistcls="member",
+ pf_elt="dl",
+ name_elt="dd",
+ name_cls="name",
+ pty_elt="dd",
+ ),
+ 93: ScrapBasicArgument(
+ pf_elt="div",
+ pf_cls="profile",
+ name_elt="div",
+ name_cls="name",
+ pty_wrapelt="a",
+ pty_wrapcls="end",
+ pty_elt="li",
+ ),
+ 94: ScrapBasicArgument(
+ pf_memlistelt="section",
+ pf_memlistcls="mbrListByName",
+ pf_elt="dl",
+ name_elt="dd",
+ name_cls="name",
+ pty_elt="dd",
+ ),
+ 95: ScrapBasicArgument(
+ pf_memlistelt="section",
+ pf_memlistcls="member",
+ pf_elt="dl",
+ name_elt="dd",
+ name_cls="name",
+ pty_elt="tr",
+ ),
+ 96: ScrapBasicArgument(
+ pf_elt="div",
+ pf_cls="profile",
+ name_elt="div",
+ name_cls="name",
+ pty_elt="em",
+ ),
+ 97: ScrapBasicArgument(
+ pf_memlistelt="ul",
+ pf_memlistcls="memberList",
+ pf_elt="li",
+ name_elt="strong",
+ pty_wrapelt="a",
+ pty_elt="tr",
+ ),
+ 98: ScrapBasicArgument(
+ pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em"
+ ),
+ 99: ScrapBasicArgument(
+ pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em"
+ ),
+ 100: ScrapBasicArgument(
+ pf_elt="div", pf_cls="list", name_elt="h4", name_cls="h0", pty_elt="li"
+ ),
# 경기 - 광주
- 101 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_elt='em'),
- 102 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_wrapelt='a', pty_wrapcls='start', pty_elt='li'),
- 103 : ScrapBasicArgument(pf_elt='div', pf_cls='col-sm-6', name_elt='h5', name_cls='h5', pty_wrapelt='a', pty_wrapcls='d-inline-block', pty_elt='li'),
- 104 : ScrapBasicArgument(pf_elt='div', pf_cls='text_box', name_elt='h3', name_cls='h0', pty_wrapelt='a', pty_wraptxt='누리집', pty_elt='li'),
- 105 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_elt='em'),
+ 101: ScrapBasicArgument(
+ pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em"
+ ),
+ 102: ScrapBasicArgument(
+ pf_elt="div",
+ pf_cls="profile",
+ name_elt="em",
+ name_cls="name",
+ pty_wrapelt="a",
+ pty_wrapcls="start",
+ pty_elt="li",
+ ),
+ 103: ScrapBasicArgument(
+ pf_elt="div",
+ pf_cls="col-sm-6",
+ name_elt="h5",
+ name_cls="h5",
+ pty_wrapelt="a",
+ pty_wrapcls="d-inline-block",
+ pty_elt="li",
+ ),
+ 104: ScrapBasicArgument(
+ pf_elt="div",
+ pf_cls="text_box",
+ name_elt="h3",
+ name_cls="h0",
+ pty_wrapelt="a",
+ pty_wraptxt="누리집",
+ pty_elt="li",
+ ),
+ 105: ScrapBasicArgument(
+ pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em"
+ ),
# 강원
# 106 : TODO! 정당정보 없음
# TODO! 107이 get_soup에서 실패 중 - HTTPSConnectionPool(host='council.wonju.go.kr', port=443): Max retries exceeded with url: /content/member/memberName.html (Caused by SSLError(SSLError(1, '[SSL: DH_KEY_TOO_SMALL] dh key too small (_ssl.c:1007)')))
- 107 : ScrapBasicArgument(pf_memlistelt='div', pf_memlistcls='content', pf_elt='dl', name_elt='dd', name_cls='name', pty_elt='span'),
- 108 : ScrapBasicArgument(pf_elt='dl', pf_cls='profile', name_elt='strong', pty_elt='li'),
- 109 : ScrapBasicArgument(pf_memlistelt='section', pf_memlistcls='memberName', pf_elt='dl', name_elt='dd', name_cls='name', pty_elt='span'),
- 110 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_elt='em'),
- # 111 : TODO! 정당 없고 홈페이지는 깨짐
- 112 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='em', name_cls='name', pty_elt='em'),
- 113 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_cls='name', pty_elt='li'),
- 115 : ScrapBasicArgument(pf_elt='div', pf_cls='profile', name_elt='div', name_cls='name', pty_elt='li'),
+ 107: ScrapBasicArgument(
+ pf_memlistelt="div",
+ pf_memlistcls="content",
+ pf_elt="dl",
+ name_elt="dd",
+ name_cls="name",
+ pty_elt="span",
+ ),
+ 108: ScrapBasicArgument(
+ pf_elt="dl", pf_cls="profile", name_elt="strong", pty_elt="li"
+ ),
+ 109: ScrapBasicArgument(
+ pf_memlistelt="section",
+ pf_memlistcls="memberName",
+ pf_elt="dl",
+ name_elt="dd",
+ name_cls="name",
+ pty_elt="span",
+ ),
+ 110: ScrapBasicArgument(
+ pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em"
+ ),
+ # 111 : TODO! 정당 없고 홈페이지는 깨짐
+ 112: ScrapBasicArgument(
+ pf_elt="div", pf_cls="profile", name_elt="em", name_cls="name", pty_elt="em"
+ ),
+ 113: ScrapBasicArgument(
+ pf_elt="div", pf_cls="profile", name_cls="name", pty_elt="li"
+ ),
+ 115: ScrapBasicArgument(
+ pf_elt="div",
+ pf_cls="profile",
+ name_elt="div",
+ name_cls="name",
+ pty_elt="li",
+ ),
# TODO : 정당이 주석처리되어 있어서 soup가 인식을 못함.
- 116 : ScrapBasicArgument(pf_elt='div', pf_cls='memberName', name_cls='name',pty_elt='dd'),
+ 116: ScrapBasicArgument(
+ pf_elt="div", pf_cls="memberName", name_cls="name", pty_elt="dd"
+ ),
}
# 데이터 가져오기
data: list[dict] = worksheet.get_all_records()
- result: str = ''
+ result: str = ""
error_times = 0
parse_error_times = 0
@@ -135,19 +418,27 @@ def main() -> None:
N = 226
# for n in range (113, 169):
for n in range(107, 108):
- encoding = 'euc-kr' if n in euc_kr else 'utf-8'
+ encoding = "euc-kr" if n in euc_kr else "utf-8"
try:
if n in special_functions:
function_name = f"scrap_{n}"
if hasattr(sys.modules[__name__], function_name):
function_to_call = getattr(sys.modules[__name__], function_name)
if n < 57:
- result = str(function_to_call(data[n - 1]['상세약력 링크']).councilors)
+ result = str(
+ function_to_call(data[n - 1]["상세약력 링크"]).councilors
+ )
else:
- result = str(function_to_call(data[n - 1]['상세약력 링크'], args=args[n]).councilors)
+ result = str(
+ function_to_call(
+ data[n - 1]["상세약력 링크"], args=args[n]
+ ).councilors
+ )
else:
- result = str(scrap_basic(data[n - 1]['상세약력 링크'], n, args[n], encoding).councilors)
- if '정보 없음' in result:
+ result = str(
+ scrap_basic(data[n - 1]["상세약력 링크"], n, args[n], encoding).councilors
+ )
+ if "정보 없음" in result:
print("정보 없음이 포함되어 있습니다.")
parse_error_times += 1
print(result)
@@ -158,6 +449,10 @@ def main() -> None:
print(f"오류 : [district-{n}] {str(e)}")
error_times += 1
continue # 에러가 발생하면 다음 반복으로 넘어감
- print(f"| 총 실행 횟수: {N} | 에러 횟수: {error_times} | 정보 없음 횟수: {parse_error_times} | 타임아웃 횟수: {timeouts} |")
-if __name__ == '__main__':
+ print(
+ f"| 총 실행 횟수: {N} | 에러 횟수: {error_times} | 정보 없음 횟수: {parse_error_times} | 타임아웃 횟수: {timeouts} |"
+ )
+
+
+if __name__ == "__main__":
main()
diff --git a/scrap/utils/types.py b/scrap/utils/types.py
index cf3733f..a6ed4f1 100644
--- a/scrap/utils/types.py
+++ b/scrap/utils/types.py
@@ -1,4 +1,4 @@
-#coding: utf-8
+# coding: utf-8
"""
의회 크롤링 결과를 나타내는 타입을 정의합니다.
"""
@@ -6,35 +6,42 @@
from typing import Optional, List
from dataclasses import dataclass
+
class CouncilType(str, Enum):
"""
의회의 종류를 나타내는 열거형입니다.
"""
- LOCAL_COUNCIL = "local_council"
+
+ LOCAL_COUNCIL = "local_council"
NATIONAL_COUNCIL = "national_council"
METROPOLITAN_COUNCIL = "metropolitan_council"
"""
기초의회
"""
+
def __str__(self):
"""
JSON으로 직렬화하기 위해 문자열로 변환하는 함수를 오버라이드합니다.
"""
return str(self.value)
+
@dataclass
class Councilor:
"""
의원(이름 및 정당)을 나타내는 타입입니다.
"""
+
name: str
party: str
+
@dataclass
class ScrapResult:
"""
의회 크롤링 결과를 나타내는 타입입니다.
"""
+
council_id: str
"""
의회를 구분하기 위한 문자열입니다.
@@ -50,23 +57,26 @@ class ScrapResult:
class ScrapBasicArgument:
- '''
+ """
scrap_basic에 쓸 argument입니다
- '''
- def __init__(self,
- pf_elt: str | None = None,
- pf_cls: str | None = None,
- pf_memlistelt: str | None = None,
- pf_memlistcls: str | None = None,
- name_elt: str | None = None,
- name_cls: str | None = None,
- name_wrapelt: str | None = None,
- name_wrapcls: str | None = None,
- pty_elt: str | None = None,
- pty_cls: str | None = None,
- pty_wrapelt: str | None = None,
- pty_wrapcls: str | None = None,
- pty_wraptxt: str | None = None):
+ """
+
+ def __init__(
+ self,
+ pf_elt: str | None = None,
+ pf_cls: str | None = None,
+ pf_memlistelt: str | None = None,
+ pf_memlistcls: str | None = None,
+ name_elt: str | None = None,
+ name_cls: str | None = None,
+ name_wrapelt: str | None = None,
+ name_wrapcls: str | None = None,
+ pty_elt: str | None = None,
+ pty_cls: str | None = None,
+ pty_wrapelt: str | None = None,
+ pty_wrapcls: str | None = None,
+ pty_wraptxt: str | None = None,
+ ):
"""
ScrapBasicArgument 클래스의 생성자입니다.
@@ -97,4 +107,4 @@ def __init__(self,
self.pty_cls = pty_cls
self.pty_wrapelt = pty_wrapelt
self.pty_wrapcls = pty_wrapcls
- self.pty_wraptxt = pty_wraptxt
\ No newline at end of file
+ self.pty_wraptxt = pty_wraptxt
diff --git a/scrap/utils/utils.py b/scrap/utils/utils.py
index 025ac0e..ef37957 100644
--- a/scrap/utils/utils.py
+++ b/scrap/utils/utils.py
@@ -1,20 +1,22 @@
from scrap.utils.requests import get_soup
+
def getPartyList():
"""
중앙선거관리위원회에서 제공하는 정당 목록을 가져옵니다.
"""
- url = 'https://www.nec.go.kr/site/nec/ex/bbs/List.do?cbIdx=1239'
+ url = "https://www.nec.go.kr/site/nec/ex/bbs/List.do?cbIdx=1239"
soup = get_soup(url)
- table = soup.find('table', class_='list type2')
+ table = soup.find("table", class_="list type2")
partyList = []
- for tr in table.find('tbody').find_all('tr'):
- td = tr.find_all('td')
- if td[0].get_text(strip=True).split("
")[0] == '시도':
+ for tr in table.find("tbody").find_all("tr"):
+ td = tr.find_all("td")
+ if td[0].get_text(strip=True).split("
")[0] == "시도":
continue
# 더불어민주당(민주당, 더민주) 등은 약자가 괄호 안에 있다.
partyList.append(td[0].get_text(strip=True).split("
")[0].split("(")[0])
return partyList
-if __name__ == '__main__':
- print(getPartyList())
\ No newline at end of file
+
+if __name__ == "__main__":
+ print(getPartyList())