Skip to content

Commit

Permalink
Delete baseline pvalue from correction method input (#440)
Browse files Browse the repository at this point in the history
Co-authored-by: Jorge Gabín <jorge@linknovate.com>
  • Loading branch information
JorgeGabin and Jorge Gabín authored May 2, 2024
1 parent 82ab4da commit 3807792
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
5 changes: 4 additions & 1 deletion pyterrier/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,8 +561,11 @@ def _apply_round(measure, value):
for pcol in p_col_names:
pcol_reject = pcol.replace("p-value", "reject")
pcol_corrected = pcol + " corrected"
reject, corrected, _, _ = statsmodels.stats.multitest.multipletests(df[pcol], alpha=correction_alpha, method=correction)
reject, corrected, _, _ = statsmodels.stats.multitest.multipletests(df[pcol].drop(df.index[baseline]), alpha=correction_alpha, method=correction)
insert_pos = df.columns.get_loc(pcol)
# add reject/corrected values for the baseline
reject = np.insert(reject, baseline, False)
corrected = np.insert(corrected, baseline, np.nan)
# add extra columns, put place directly after the p-value column
df.insert(insert_pos+1, pcol_reject, reject)
df.insert(insert_pos+2, pcol_corrected, corrected)
Expand Down
6 changes: 4 additions & 2 deletions tests/test_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,15 +363,17 @@ def test_baseline_corrected(self):
dataset = pt.get_dataset("vaswani")
res1 = pt.BatchRetrieve(dataset.get_index(), wmodel="BM25")(dataset.get_topics().head(10))
res2 = pt.BatchRetrieve(dataset.get_index(), wmodel="DPH")(dataset.get_topics().head(10))
for corr in ['hs', 'bonferroni', 'holm-sidak']:
baseline = 0
for corr in ['hs', 'bonferroni', 'hommel']:
df = pt.Experiment(
[res1, res2],
dataset.get_topics().head(10),
dataset.get_qrels(),
eval_metrics=["map", "ndcg"],
baseline=0, correction='hs')
baseline=baseline, correction=corr)
self.assertTrue("map +" in df.columns)
self.assertTrue("map -" in df.columns)
self.assertTrue("map p-value" in df.columns)
self.assertTrue("map p-value corrected" in df.columns)
self.assertTrue("map reject" in df.columns)
self.assertFalse(any(df["map p-value corrected"].drop(df.index[baseline]).isna()))

0 comments on commit 3807792

Please sign in to comment.