Skip to content

Commit

Permalink
refactor(backend): type annotations, minor caching thing
Browse files Browse the repository at this point in the history
  • Loading branch information
davidpomerenke committed Aug 25, 2024
1 parent 0f96879 commit a632845
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def _story_list_all_pages(
collection_ids: list[int] | None = None,
platform: Platform = "onlinenews-mediacloud",
sample_frac: float = 1,
verbose: bool = False,
):
all_stories = []
more_stories = True
Expand All @@ -95,9 +96,10 @@ def _story_list_all_pages(
)
else:
dt = end_date
print(
f"retrieved metadata for {len(all_stories)} stories for month {start_date.year}-{start_date.month}, currently at {dt}"
)
if verbose:
print(
f"retrieved metadata for {len(all_stories)} stories for month {start_date.year}-{start_date.month}, currently at {dt}"
)
# https://github.com/mediacloud/api-tutorial-notebooks/blob/main/MC02%20-%20attention.ipynb:
# > As you may have noted, this can take a while for long time periods. If you look closely you'll notice that it can't be easily parallelized, because it requires content in the results to make the next call. A workaround is to divide you query up by time and query in parallel for something like each day. This can speed up the response. Also just contact us directly if you are trying to do larger data dumps, or hit up against your API quota.
# take a 1% sample of stories
Expand All @@ -118,6 +120,7 @@ def _slice_date_range(start: date, end: date) -> list[tuple[date, date]]:
return result


@cache
def _story_list_split_monthly(
query: str,
start_date: date,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from media_impact_monitor.util.cache import cache


@cache
def get_sentiment_trend(q: TrendSearch) -> tuple[pd.DataFrame | None, list[str]]:
"""
Retrieves the sentiment trend for a given query and start date.
Expand Down
2 changes: 1 addition & 1 deletion backend-python/media_impact_monitor/util/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def parallel_tqdm(
n_jobs: int = 8,
backend: str = "loky",
**kwargs,
):
) -> list:
"""Parallelize a function with a tqdm progress bar."""
total = total or len(iter)
results = Parallel(n_jobs=n_jobs, return_as="generator", backend=backend)(
Expand Down

0 comments on commit a632845

Please sign in to comment.