From ae70c95df37e5ae66e5a6be1c86030b92d04e324 Mon Sep 17 00:00:00 2001 From: keonly Date: Sat, 25 Nov 2023 17:01:49 +0900 Subject: [PATCH 1/2] feat: send webhooks to slack app when scraping --- configurations/secrets.py | 8 ++++++++ scrap/utils/runner.py | 25 +++++++++++++++++++------ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/configurations/secrets.py b/configurations/secrets.py index 5a72cd4..cd4ef62 100644 --- a/configurations/secrets.py +++ b/configurations/secrets.py @@ -37,3 +37,11 @@ class EmailSecrets: sender_email = str(os.getenv("SCRAP_SENDER_EMAIL") or "") receiver_email = str(os.getenv("SCRAP_RECEIVER_EMAIL") or "") password = str(os.getenv("SCRAP_EMAIL_PASSWORD") or "") + + +class WebhookSecrets: + """ + 스크랩 결과 웹훅 전송에 필요한 키를 정의합니다. + """ + + webhook_url = str(os.getenv("WEBHOOK_URL") or "") diff --git a/scrap/utils/runner.py b/scrap/utils/runner.py index dd121b2..d40a162 100644 --- a/scrap/utils/runner.py +++ b/scrap/utils/runner.py @@ -10,6 +10,8 @@ from tqdm import tqdm from abc import * +from configurations.secrets import WebhookSecrets + from scrap.utils.export import export_results_to_json, export_results_to_txt from scrap.utils.database import save_to_database from scrap.utils.types import ScrapResult, ScrapBasicArgument @@ -30,6 +32,7 @@ from scrap.local_councils import * from scrap.metropolitan_council import * from scrap.national_council import * +from requests import post from requests.exceptions import Timeout @@ -64,6 +67,16 @@ def handle_errors(self, cid: int | str, error): self.parseerror_count += 1 logging.error(f"| {cid} | 오류: {error}") + def send_webhook(self, message: str) -> None: + webhook_url = WebhookSecrets.webhook_url + payload = {"text": message} + + response = requests.post(webhook_url, json=payload) + if response.status_code != 200: + raise ValueError( + f"Request to slack returned an error {response.status_code}, the response is:\n{response.text}" + ) + @abstractmethod def run(self) -> Dict[str, ScrapResult]: pass @@ -136,9 +149,9 @@ def run(self, cids: Iterable[int]) -> Dict[int, ScrapResult]: except Exception as e: self.handle_errors(cid, e) - logging.info( - f"| 총 실행 횟수: {len(cids)} | 에러: {list(self.error_log.keys())}, 총 {len(self.error_log)}회 | 그 중 정보 없음 횟수: {self.parseerror_count} | 타임아웃 횟수: {self.timeout_count} |" - ) + result_summary = f"| 총 실행 횟수: {len(cids)} | 에러: {list(self.error_log.keys())}, 총 {len(self.error_log)}회 | 그 중 정보 없음 횟수: {self.parseerror_count} | 타임아웃 횟수: {self.timeout_count} |" + logging.info(result_summary) + self.send_webhook("지방의회 스크랩 결과\n" + result_summary) return scrape_results @@ -168,9 +181,9 @@ def run(self, cids: Iterable[int]) -> Dict[int, ScrapResult]: except Exception as e: self.handle_errors(cid, e) - logging.info( - f"| 총 실행 횟수: {len(cids)} | 에러: {list(self.error_log.keys())}, 총 {len(self.error_log)}회 | 그 중 정보 없음 횟수: {self.parseerror_count} | 타임아웃 횟수: {self.timeout_count} |" - ) + result_summary = f"| 총 실행 횟수: {len(cids)} | 에러: {list(self.error_log.keys())}, 총 {len(self.error_log)}회 | 그 중 정보 없음 횟수: {self.parseerror_count} | 타임아웃 횟수: {self.timeout_count} |" + logging.info(result_summary) + self.send_webhook("광역의회 스크랩 결과\n" + result_summary) return scrape_results From 025ebd839cf552c1126fb63c8d1c7a4f32a80537 Mon Sep 17 00:00:00 2001 From: keonly Date: Mon, 27 Nov 2023 20:41:10 +0900 Subject: [PATCH 2/2] feat: add option to disable webhook --- scrap/utils/runner.py | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/scrap/utils/runner.py b/scrap/utils/runner.py index d40a162..c18e335 100644 --- a/scrap/utils/runner.py +++ b/scrap/utils/runner.py @@ -137,7 +137,7 @@ def run_single(self, cid: int) -> ScrapResult: return result - def run(self, cids: Iterable[int]) -> Dict[int, ScrapResult]: + def run(self, cids: Iterable[int], enable_webhook: bool) -> Dict[int, ScrapResult]: scrape_results = dict() for cid in tqdm(cids): @@ -151,7 +151,8 @@ def run(self, cids: Iterable[int]) -> Dict[int, ScrapResult]: result_summary = f"| 총 실행 횟수: {len(cids)} | 에러: {list(self.error_log.keys())}, 총 {len(self.error_log)}회 | 그 중 정보 없음 횟수: {self.parseerror_count} | 타임아웃 횟수: {self.timeout_count} |" logging.info(result_summary) - self.send_webhook("지방의회 스크랩 결과\n" + result_summary) + if enable_webhook: + self.send_webhook("지방의회 스크랩 결과\n" + result_summary) return scrape_results @@ -169,7 +170,7 @@ def run_single(self, cid: int) -> ScrapResult: raise NotImplementedError(f"함수를 찾을 수 없습니다: {function_name}") return result - def run(self, cids: Iterable[int]) -> Dict[int, ScrapResult]: + def run(self, cids: Iterable[int], enable_webhook: bool) -> Dict[int, ScrapResult]: scrape_results = dict() for cid in tqdm(cids): @@ -183,7 +184,8 @@ def run(self, cids: Iterable[int]) -> Dict[int, ScrapResult]: result_summary = f"| 총 실행 횟수: {len(cids)} | 에러: {list(self.error_log.keys())}, 총 {len(self.error_log)}회 | 그 중 정보 없음 횟수: {self.parseerror_count} | 타임아웃 횟수: {self.timeout_count} |" logging.info(result_summary) - self.send_webhook("광역의회 스크랩 결과\n" + result_summary) + if enable_webhook: + self.send_webhook("광역의회 스크랩 결과\n" + result_summary) return scrape_results @@ -239,23 +241,24 @@ def main(args: Dict[str, str]) -> None: runner = ScraperFactory(where, runner_kwargs).create_scraper() cids_to_run = parse_cids(args.get("cids"), where) + enable_webhook = args.get("disable-webhook") if cids_to_run: - results = runner.run(cids_to_run) + results = runner.run(cids_to_run, enable_webhook) else: results = runner.run() - if args.get("update_mongo"): + if args.get("update-mongo"): for result in results.values(): save_to_database(result) - if args.get("output_store"): - if args.get("output_format") == "json": - export_results_to_json(results, args.get("output_path"), current_time) - elif args.get("output_format") == "txt": - export_results_to_txt(results, args.get("output_path"), current_time) + if args.get("output-store"): + if args.get("output-format") == "json": + export_results_to_json(results, args.get("output-path"), current_time) + elif args.get("output-format") == "txt": + export_results_to_txt(results, args.get("output-path"), current_time) -def parse_cids(cids_str: Optional[str], where: str) -> Optional[List[int]]: +def parse_cids(cids_str: Optional[str], where: str) -> Optional[Iterable[int]]: if cids_str and where in ["local", "metro"]: return [int(cid.strip()) for cid in cids_str.split(",")] elif where == "metro": @@ -284,18 +287,18 @@ def parse_cids(cids_str: Optional[str], where: str) -> Optional[List[int]]: ) parser.add_argument("-l", "--log_path", help="로그 파일 경로", default="logs") parser.add_argument( - "-m", "--update_mongo", help="스크랩 결과를 MongoDB에 업데이트", action="store_true" + "-m", "--update-mongo", help="스크랩 결과를 MongoDB에 업데이트", action="store_true" ) parser.add_argument( - "-o", "--output_store", help="스크랩 결과를 로컬에 저장", action="store_true" + "-o", "--output-store", help="스크랩 결과를 로컬에 저장", action="store_true" ) parser.add_argument( - "--output_format", + "--output-format", help="스크랩 결과 저장 형식 ('json', 'txt')", choices=["json", "txt"], default="json", ) - parser.add_argument("--output_path", help="스크랩 결과 저장 경로", default="output") + parser.add_argument("--output-path", help="스크랩 결과 저장 경로", default="output") parser.add_argument( "-c", "--cids", help="스크랩할 의회 ID 목록 (','로 구분, 지방/광역의회만 해당)", default=None ) @@ -309,6 +312,11 @@ def parse_cids(cids_str: Optional[str], where: str) -> Optional[List[int]]: help="지방의회 스크랩 시 사용할 council_args JSON 파일 경로", default="scrap/utils/scrap_args.json", ) + parser.add_argument( + "--disable-webhook", + help="스크랩 결과 웹훅 전송 비활성화", + action="store_false", + ) args = vars(parser.parse_args()) main(args)