From fc771808d6112b4e0172d7a24253ccc6b9df634d Mon Sep 17 00:00:00 2001 From: Re-st Date: Thu, 30 Nov 2023 23:28:13 +0900 Subject: [PATCH 1/2] [scrap] 205 --- scrap/local_councils/gyeongsang.py | 19 +++++++++++++++++++ scrap/utils/runner_args.json | 2 +- scrap/utils/spreadsheet.py | 8 +++----- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/scrap/local_councils/gyeongsang.py b/scrap/local_councils/gyeongsang.py index 73ccec7..500631f 100644 --- a/scrap/local_councils/gyeongsang.py +++ b/scrap/local_councils/gyeongsang.py @@ -388,6 +388,25 @@ def scrap_204( return ret_local_councilors(cid, councilors) +def scrap_205( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상북도 영양군""" + # TODO : gzip 문제 생기니, selenium으로 대체 + print(url) + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + profile_list = soup.find("div", id="content_box") + for name_tag in profile_list.find_all('h3'): + name = name_tag.get_text(strip=True).split("(")[0] if name_tag else "이름 정보 없음" + ul = name_tag.find_next('ul') + li_party = ul.find('li', string='소속정당') + party = li_party.text.split(' : ')[-1].strip() + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) def scrap_206( url, diff --git a/scrap/utils/runner_args.json b/scrap/utils/runner_args.json index d03af28..d2dbddf 100644 --- a/scrap/utils/runner_args.json +++ b/scrap/utils/runner_args.json @@ -10,7 +10,7 @@ 88, 97, 103, 107, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 132, 134, 140, 142, 154, 155, 156, 157, 160, 161, 162, 163, 164, 165, 167, 175, 177, 178, 179, 182, 183, 184, 186, 188, 189, 190, 191, 192, 194, - 195, 196, 197, 198, 199, 201, 202, 203, 204, 206, 208, 209, 210, 212, 213, 214, 215, 216, + 195, 196, 197, 198, 199, 201, 202, 203, 204, 205, 206, 208, 209, 210, 212, 213, 214, 215, 216, 217, 218, 219, 220, 222, 223, 224, 226 ], "selenium_basic": [76, 78, 101, 169, 173], diff --git a/scrap/utils/spreadsheet.py b/scrap/utils/spreadsheet.py index 194bc91..c50c9d1 100644 --- a/scrap/utils/spreadsheet.py +++ b/scrap/utils/spreadsheet.py @@ -162,10 +162,8 @@ def scrap_all_local_councils() -> None: 197, 198, 199, - 201, - 202, - 203, - 204, + 201,202, + 203,204,205, 206, 208, 209, @@ -189,7 +187,7 @@ def scrap_all_local_councils() -> None: parse_error_times = 0 timeouts = 0 N = 226 - for n in [204]: + for n in [205]: if n in no_information + error_unsolved: emsg: str = ( ( From b5dd4695fa8d03f6ae627960eefe6e34b967af0b Mon Sep 17 00:00:00 2001 From: Re-st Date: Thu, 30 Nov 2023 14:30:04 +0000 Subject: [PATCH 2/2] Formatted with black --- scrap/local_councils/gyeongsang.py | 10 ++++++---- scrap/utils/spreadsheet.py | 7 +++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/scrap/local_councils/gyeongsang.py b/scrap/local_councils/gyeongsang.py index 500631f..693bcbf 100644 --- a/scrap/local_councils/gyeongsang.py +++ b/scrap/local_councils/gyeongsang.py @@ -388,6 +388,7 @@ def scrap_204( return ret_local_councilors(cid, councilors) + def scrap_205( url, cid, @@ -399,15 +400,16 @@ def scrap_205( soup = get_soup(url, verify=False) councilors: List[Councilor] = [] profile_list = soup.find("div", id="content_box") - for name_tag in profile_list.find_all('h3'): + for name_tag in profile_list.find_all("h3"): name = name_tag.get_text(strip=True).split("(")[0] if name_tag else "이름 정보 없음" - ul = name_tag.find_next('ul') - li_party = ul.find('li', string='소속정당') - party = li_party.text.split(' : ')[-1].strip() + ul = name_tag.find_next("ul") + li_party = ul.find("li", string="소속정당") + party = li_party.text.split(" : ")[-1].strip() councilors.append(Councilor(name=name, jdName=party)) return ret_local_councilors(cid, councilors) + def scrap_206( url, cid, diff --git a/scrap/utils/spreadsheet.py b/scrap/utils/spreadsheet.py index c50c9d1..70652e2 100644 --- a/scrap/utils/spreadsheet.py +++ b/scrap/utils/spreadsheet.py @@ -162,8 +162,11 @@ def scrap_all_local_councils() -> None: 197, 198, 199, - 201,202, - 203,204,205, + 201, + 202, + 203, + 204, + 205, 206, 208, 209,