Skip to content

Commit

Permalink
fix(mediacloud_.py): fix fulltexts tests
Browse files Browse the repository at this point in the history
  • Loading branch information
davidpomerenke committed Sep 1, 2024
1 parent dbd4fb6 commit cedfded
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ def get_mediacloud_fulltexts(
label = "Extracting fulltexts"
df["text"] = parallel_tqdm(_extract, urls_and_responses, desc=f"{label:<{40}}")
df = df.dropna(subset=["text"]).rename(columns={"publish_date": "date"})
df = df[(df["date"] >= start_date) & (df["date"] <= end_date)]
df = df[
[
# "id",
Expand Down
10 changes: 5 additions & 5 deletions backend-python/media_impact_monitor/fulltexts_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import date, timedelta
from datetime import date

import pandas as pd
import pytest
Expand Down Expand Up @@ -41,7 +41,7 @@ def test_get_fulltexts_for_event():
media_source="news_online",
event_id=event_id,
),
sample_frac=0.1,
sample_frac=1,
)
assert texts is not None
assert len(texts) > 0
Expand Down Expand Up @@ -124,13 +124,13 @@ def test_get_fulltexts_date_range(default_start_date, default_end_date):
start_date=default_start_date,
end_date=default_end_date,
)
result = get_fulltexts(q, sample_frac=0.001)
result = get_fulltexts(q, sample_frac=0.01)
assert isinstance(result, pd.DataFrame)
assert not result.empty
assert all(
default_start_date <= date <= default_end_date for date in result["date"]
)
assert "activism_sentiment" in result.columns
assert "policy_sentiment" in result.columns
assert all(result["activism_sentiment"].isin([-1, 0, 1]))
assert all(result["policy_sentiment"].isin([-1, 0, 1]))
assert all(result["activism_sentiment"].isin([-1, 0, 1, None]))
assert all(result["policy_sentiment"].isin([-1, 0, 1, None]))

0 comments on commit cedfded

Please sign in to comment.