Skip to content

Commit

Permalink
fix(impact): do not estimate impact with too little data
Browse files Browse the repository at this point in the history
  • Loading branch information
davidpomerenke committed Jul 21, 2024
1 parent cbe144d commit 6d34c2b
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 8 deletions.
9 changes: 8 additions & 1 deletion backend-python/media_impact_monitor/impact.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ def get_impact(q: ImpactSearch) -> Impact:
end_date=q.end_date,
)
)
n_event_days = events["date"].nunique()
if n_event_days < 5:
return Impact(
method_applicability=False,
method_limitations=["Not enough events to estimate impact."],
impact_estimates=None,
)
q.impacted_trend.start_date = q.start_date
q.impacted_trend.end_date = q.end_date
trends = get_trend(TrendSearch(**dict(q.impacted_trend)))
Expand All @@ -55,7 +62,7 @@ def get_impact(q: ImpactSearch) -> Impact:
assert (
len(set([str(lims) for lims in lims_list])) == 1
), "All topics should have same limitations."
n_days = 14 - 1
n_days = 7 - 1
return Impact(
method_applicability=applicabilities[0],
method_limitations=lims_list[0],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def add_emws(df: pd.DataFrame, spans=[1, 2, 7, 30, 90, 365]):
"""Add new columns with exponentially weighted moving averages."""
emws = pd.DataFrame(
{
f"{col}_emw{i}": df.shift(1).ewm(span=i).mean().iloc[:, 0]
f"{col}_emw{i}": df.shift(1).ewm(halflife=i).mean().iloc[:, 0]
for i in spans
for col in df.columns
}
Expand All @@ -46,10 +46,10 @@ def regress(
"""Get regression result where the outcome is `day` days after the treatment."""
lags = range(1, lags + 1)
media_df = pd.DataFrame(media_df, columns=["count"])
protest_df = add_lags(protest_df, lags=lags)
media_df = add_lags(media_df, lags=lags)
# protest_df = add_lags(protest_df, lags=[])
media_df = add_lags(media_df, lags=[4,5,6,7,8])
# protest_df = add_emws(protest_df)
media_df = add_emws(media_df, spans=[7, 30, 90])
# media_df = add_emws(media_df, spans=[14])
df = pd.concat([protest_df, media_df], axis=1)
df = add_weekday_dummies(df)
treatment = "protest"
Expand All @@ -63,16 +63,20 @@ def regress(
else:
df[outcome] = df[outcome].rolling(day + 1).sum()
df = df.dropna()
placebo = False
if placebo:
df[treatment] = df.sample(frac=1)[treatment].to_list()
X = df.drop(columns=[outcome])
y = df[outcome]
model = sm.OLS(y, sm.add_constant(X))
model = model.fit(cov_type="HC3")
alpha = 0.1
return {
"date": day,
"mean": model.params[treatment],
"p": model.pvalues[treatment],
"ci_lower": model.conf_int()[0][treatment],
"ci_upper": model.conf_int()[1][treatment],
"ci_lower": model.conf_int(alpha=alpha)[0][treatment],
"ci_upper": model.conf_int(alpha=alpha)[1][treatment],
}


Expand Down
3 changes: 2 additions & 1 deletion frontend-observable/src/impacts.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ let impact = await queryApi('impact', {
media_source: 'news_print',
topic: 'climate_change'
},
organizer: 'Last Generation (Germany)',
organizer: 'Fridays for Future',
start_date: '2020-04-10',
end_date: '2022-04-30'
})
display(impact)
Expand Down

0 comments on commit 6d34c2b

Please sign in to comment.