Skip to content

Commit

Permalink
Merge pull request #201 from SocialChangeLab/feature/issue-192/fix-fu…
Browse files Browse the repository at this point in the history
…lltext-api-bug-1

Feature/issue 192/fix fulltext api bug 1
  • Loading branch information
davidpomerenke authored Jul 18, 2024
2 parents 53257e1 + 4af70c8 commit 41d4e48
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 12 deletions.
2 changes: 1 addition & 1 deletion backend-python/media_impact_monitor/fulltext_coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from media_impact_monitor.util.llm import completion

system_prompt = "You're a sentiment analysis tool. For a given user input, always return the sentiment of the input. Return -1 for negative, 0 for neutral, and 1 for positive. Before you make your decision, reason about the decision."
system_prompt = """You're a sentiment analysis tool. For a given user input, always return the sentiment of the input. Return -1 for negative, 0 for neutral, and 1 for positive. Before you make your decision, reason about the decision. Stick exactly to the specified JSON schema including the "sentiment_reasoning" and "sentiment" fields."""

tools = [
{
Expand Down
15 changes: 10 additions & 5 deletions backend-python/media_impact_monitor/fulltexts.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,26 +28,31 @@
def get_fulltexts(q: FulltextSearch) -> pd.DataFrame | None:
assert q.topic or q.organizers or q.query or q.event_id
keywords = load_keywords()
num_filters = sum(
[bool(q.topic), bool(q.organizers), bool(q.query), bool(q.event_id)]
)
if num_filters > 1:
raise ValueError(
"Only one of 'topic', 'organizers', 'query', 'event_id' is allowed."
)
if q.topic:
assert q.topic == "climate_change"
assert not q.query and not q.organizers and not q.event_id
assert (
q.topic == "climate_change"
), "Only 'climate_change' is supported as topic."
query = xs(
keywords["climate_science"]
+ keywords["climate_policy"]
+ keywords["climate_urgency"],
q.media_source,
)
if q.organizers:
assert not q.topic and not q.query and not q.event_id
for org in q.organizers:
assert org in climate_orgs, f"Unknown organization: {org}"
orgs = add_quotes(add_aliases(q.organizers))
query = xs_with_ys(orgs, keywords["activism"], q.media_source)
if q.query:
assert not q.topic and not q.organizers and not q.event_id
query = q.query
if q.event_id:
assert not q.topic and not q.query and not q.organizers
events = get_events_by_id([q.event_id])
assert len(events) == 1
event = events.iloc[0]
Expand Down
35 changes: 29 additions & 6 deletions backend-python/media_impact_monitor/fulltexts_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,32 @@ def test_get_fulltexts_for_event():
assert (texts["date"] <= date(2024, 5, 18)).all()


# def test_get_mediacloud_fulltexts():
# start_date = date(2024, 5, 20)
# query = '"letzte generation"'
# fulltexts = get_mediacloud_fulltexts(
# query=query, start_date=start_date, countries=["Germany"]
# )
def test_get_fulltexts_with_too_many_params():
with pytest.raises(ValueError) as e:
get_fulltexts(
FulltextSearch(
media_source="news_online",
topic="climate_change",
start_date=date(2023, 1, 1),
end_date=date(2024, 1, 31),
event_id="adb689988aa3e61021da64570bda6d95",
)
)
assert (
str(e.value)
== "Only one of 'topic', 'organizers', 'query', 'event_id' is allowed."
)


def test_get_fulltexts_for_climate_change():
texts = get_fulltexts(
FulltextSearch(
media_source="news_online",
topic="climate_change",
start_date=date(2023, 1, 1),
end_date=date(2023, 1, 2),
)
)
assert texts is not None
assert len(texts) > 0
assert all(date(2023, 1, 1) <= text.date <= date(2023, 1, 2) for text in texts)
4 changes: 4 additions & 0 deletions backend-python/media_impact_monitor/types_.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,10 @@ class PolicySearch(BaseModel):


class FulltextSearch(BaseModel):
"""
You can set parameters for media_source and date_range, and filter by one of the following: topic, organizers, query, or event_id. For now you cannot combine the latter filters, since they all affect the query in different ways.
"""

media_source: MediaSource = Field(
description="The data source for the media data (i.e., online news, print news, etc.)."
)
Expand Down

0 comments on commit 41d4e48

Please sign in to comment.