From 7d8b6b077a3d89f9b9ab7dab5ec5aaf08d350dbb Mon Sep 17 00:00:00 2001 From: David Pomerenke <46022183+davidpomerenke@users.noreply.github.com> Date: Wed, 7 Aug 2024 21:43:37 +0200 Subject: [PATCH] chore: enable cron again --- backend-python/media_impact_monitor/cron.py | 31 +++++++++++---------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/backend-python/media_impact_monitor/cron.py b/backend-python/media_impact_monitor/cron.py index 30b803b3..1cea259d 100644 --- a/backend-python/media_impact_monitor/cron.py +++ b/backend-python/media_impact_monitor/cron.py @@ -41,7 +41,7 @@ def fill_cache(): print("Filling cache...") errors = [] events = {} - for data_source in ["acled", "press_releases"]: + for data_source in ["acled"]: # , "press_releases"]: print(f"Retrieving {data_source} events...") try: events[data_source] = get_events( @@ -75,20 +75,23 @@ def fill_cache(): ) events = events["acled"] # TODO: include press_releases recent_events = events[events["date"] >= date.today() - timedelta(days=70)] - # for event in tqdm( - # list(recent_events.itertuples()), - # desc="Retrieving event fulltexts", - # ): - # try: - # get_fulltexts( - # FulltextSearch( - # media_source="news_online", - # event_id=event.event_id, - # ) - # ) - # except Exception as e: - # errors.append(f"fulltexts {event.event_id}: {e}") + for event in tqdm( + list(recent_events.itertuples()), + desc="Retrieving event fulltexts", + ): + try: + get_fulltexts( + FulltextSearch( + media_source="news_online", + event_id=event.event_id, + ) + ) + except Exception as e: + errors.append(f"fulltexts {event.event_id}: {e}") if errors: raise ValueError(f"Errors occurred: {'; '.join(errors)}") print("Successfully filled cache!") return + + +fill_cache()