Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: send webhooks to slack app when scraping #72

Merged
merged 3 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions configurations/secrets.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,11 @@ class EmailSecrets:
sender_email = str(os.getenv("SCRAP_SENDER_EMAIL") or "")
receiver_email = str(os.getenv("SCRAP_RECEIVER_EMAIL") or "")
password = str(os.getenv("SCRAP_EMAIL_PASSWORD") or "")


class WebhookSecrets:
"""
스크랩 결과 웹훅 전송에 필요한 키를 정의합니다.
"""

webhook_url = str(os.getenv("WEBHOOK_URL") or "")
61 changes: 41 additions & 20 deletions scrap/utils/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from tqdm import tqdm
from abc import *

from configurations.secrets import WebhookSecrets

from scrap.utils.export import export_results_to_json, export_results_to_txt
from scrap.utils.database import save_to_database
from scrap.utils.types import ScrapResult, ScrapBasicArgument
Expand All @@ -30,6 +32,7 @@
from scrap.local_councils import *
from scrap.metropolitan_council import *
from scrap.national_council import *
from requests import post
from scrap.group_head import *
from requests.exceptions import Timeout

Expand Down Expand Up @@ -65,6 +68,16 @@ def handle_errors(self, cid: int | str, error):
self.parseerror_count += 1
logging.error(f"| {cid} | 오류: {error}")

def send_webhook(self, message: str) -> None:
webhook_url = WebhookSecrets.webhook_url
payload = {"text": message}

response = requests.post(webhook_url, json=payload)
if response.status_code != 200:
raise ValueError(
f"Request to slack returned an error {response.status_code}, the response is:\n{response.text}"
)

@abstractmethod
def run(self) -> Dict[str, ScrapResult]:
pass
Expand Down Expand Up @@ -125,7 +138,7 @@ def run_single(self, cid: int) -> ScrapResult:

return result

def run(self, cids: Iterable[int]) -> Dict[int, ScrapResult]:
def run(self, cids: Iterable[int], enable_webhook: bool) -> Dict[int, ScrapResult]:
scrape_results = dict()

for cid in tqdm(cids):
Expand All @@ -137,9 +150,10 @@ def run(self, cids: Iterable[int]) -> Dict[int, ScrapResult]:
except Exception as e:
self.handle_errors(cid, e)

logging.info(
f"| 총 실행 횟수: {len(cids)} | 에러: {list(self.error_log.keys())}, 총 {len(self.error_log)}회 | 그 중 정보 없음 횟수: {self.parseerror_count} | 타임아웃 횟수: {self.timeout_count} |"
)
result_summary = f"| 총 실행 횟수: {len(cids)} | 에러: {list(self.error_log.keys())}, 총 {len(self.error_log)}회 | 그 중 정보 없음 횟수: {self.parseerror_count} | 타임아웃 횟수: {self.timeout_count} |"
logging.info(result_summary)
if enable_webhook:
self.send_webhook("지방의회 스크랩 결과\n" + result_summary)

return scrape_results

Expand All @@ -157,7 +171,7 @@ def run_single(self, cid: int) -> ScrapResult:
raise NotImplementedError(f"함수를 찾을 수 없습니다: {function_name}")
return result

def run(self, cids: Iterable[int]) -> Dict[int, ScrapResult]:
def run(self, cids: Iterable[int], enable_webhook: bool) -> Dict[int, ScrapResult]:
scrape_results = dict()

for cid in tqdm(cids):
Expand All @@ -169,9 +183,10 @@ def run(self, cids: Iterable[int]) -> Dict[int, ScrapResult]:
except Exception as e:
self.handle_errors(cid, e)

logging.info(
f"| 총 실행 횟수: {len(cids)} | 에러: {list(self.error_log.keys())}, 총 {len(self.error_log)}회 | 그 중 정보 없음 횟수: {self.parseerror_count} | 타임아웃 횟수: {self.timeout_count} |"
)
result_summary = f"| 총 실행 횟수: {len(cids)} | 에러: {list(self.error_log.keys())}, 총 {len(self.error_log)}회 | 그 중 정보 없음 횟수: {self.parseerror_count} | 타임아웃 횟수: {self.timeout_count} |"
logging.info(result_summary)
if enable_webhook:
self.send_webhook("광역의회 스크랩 결과\n" + result_summary)

return scrape_results

Expand Down Expand Up @@ -234,23 +249,24 @@ def main(args: Dict[str, str]) -> None:
runner = ScraperFactory(where, runner_kwargs).create_scraper()

cids_to_run = parse_cids(args.get("cids"), where)
enable_webhook = args.get("disable-webhook")
if cids_to_run:
results = runner.run(cids_to_run)
results = runner.run(cids_to_run, enable_webhook)
else:
results = runner.run()

if args.get("update_mongo"):
if args.get("update-mongo"):
for result in results.values():
save_to_database(result)

if args.get("output_store"):
if args.get("output_format") == "json":
export_results_to_json(results, args.get("output_path"), current_time)
elif args.get("output_format") == "txt":
export_results_to_txt(results, args.get("output_path"), current_time)
if args.get("output-store"):
if args.get("output-format") == "json":
export_results_to_json(results, args.get("output-path"), current_time)
elif args.get("output-format") == "txt":
export_results_to_txt(results, args.get("output-path"), current_time)


def parse_cids(cids_str: Optional[str], where: str) -> Optional[List[int]]:
def parse_cids(cids_str: Optional[str], where: str) -> Optional[Iterable[int]]:
if cids_str and where in ["local", "metro"]:
return [int(cid.strip()) for cid in cids_str.split(",")]
elif where == "metro":
Expand Down Expand Up @@ -280,18 +296,18 @@ def parse_cids(cids_str: Optional[str], where: str) -> Optional[List[int]]:
)
parser.add_argument("-l", "--log_path", help="로그 파일 경로", default="logs")
parser.add_argument(
"-m", "--update_mongo", help="스크랩 결과를 MongoDB에 업데이트", action="store_true"
"-m", "--update-mongo", help="스크랩 결과를 MongoDB에 업데이트", action="store_true"
)
parser.add_argument(
"-o", "--output_store", help="스크랩 결과를 로컬에 저장", action="store_true"
"-o", "--output-store", help="스크랩 결과를 로컬에 저장", action="store_true"
)
parser.add_argument(
"--output_format",
"--output-format",
help="스크랩 결과 저장 형식 ('json', 'txt')",
choices=["json", "txt"],
default="json",
)
parser.add_argument("--output_path", help="스크랩 결과 저장 경로", default="output")
parser.add_argument("--output-path", help="스크랩 결과 저장 경로", default="output")
parser.add_argument(
"-c", "--cids", help="스크랩할 의회 ID 목록 (','로 구분, 지방/광역의회만 해당)", default=None
)
Expand All @@ -305,6 +321,11 @@ def parse_cids(cids_str: Optional[str], where: str) -> Optional[List[int]]:
help="지방의회 스크랩 시 사용할 council_args JSON 파일 경로",
default="scrap/utils/scrap_args.json",
)
parser.add_argument(
"--disable-webhook",
help="스크랩 결과 웹훅 전송 비활성화",
action="store_false",
)
args = vars(parser.parse_args())

main(args)