Skip to content

Commit

Permalink
style(fulltext pipeline): align tqdms a bit more
Browse files Browse the repository at this point in the history
  • Loading branch information
davidpomerenke committed Aug 4, 2024
1 parent 629059e commit cfd979b
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,11 @@ def func(start_and_end):
sample_frac=sample_frac,
)

label = "Downloading metadata by month"
stories_lists = parallel_tqdm(
func,
_slice_date_range(start_date, end_date),
desc="Downloading metadata by month",
desc=f"{label:<{40}}",
n_jobs=8,
)
stories = [s for sl in stories_lists for s in sl]
Expand Down Expand Up @@ -169,13 +170,11 @@ def get_mediacloud_fulltexts(
sample_frac=sample_frac,
)
df = df[~df["url"].str.contains("news.de")]
responses = parallel_tqdm(
get, df["url"].tolist(), desc="Downloading fulltexts", n_jobs=8
)
label = "Downloading fulltexts"
responses = parallel_tqdm(get, df["url"].tolist(), desc=f"{label:<{40}}", n_jobs=8)
urls_and_responses = list(zip(df["url"], responses))
df["text"] = parallel_tqdm(
_extract, urls_and_responses, desc="Extracting fulltexts"
)
label = "Extracting fulltexts"
df["text"] = parallel_tqdm(_extract, urls_and_responses, desc=f"{label:<{40}}")
df = df.dropna(subset=["text"]).rename(columns={"publish_date": "date"})
df = df[
[
Expand Down
5 changes: 2 additions & 3 deletions backend-python/media_impact_monitor/fulltext_coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,8 @@ async def code_fulltext(text: str) -> dict | None:

async def code_many_fulltexts_async(texts: list[str]) -> list[dict | None]:
acompletions = [code_fulltext(text) for text in texts]
completions = await tqdm_asyncio.gather(
*acompletions, desc="Coding sentiment of fulltexts with AI"
)
label = "Coding sentiment of fulltexts with AI"
completions = await tqdm_asyncio.gather(*acompletions, desc=f"{label:<{40}}")
return completions


Expand Down

0 comments on commit cfd979b

Please sign in to comment.