Skip to content

Commit

Permalink
add FBR support, improve comments/variable names, check for empty query
Browse files Browse the repository at this point in the history
  • Loading branch information
cmacdonald committed Aug 21, 2024
1 parent 7a156cc commit 82733e7
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 13 deletions.
35 changes: 27 additions & 8 deletions pyterrier/terrier/retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def _retrieve_one(self, row, input_results=None, docno_provided=False, docid_pro
qid = str(row.qid)

# row is a namedtuple, whose fields are exposed in _fields
query_toks_present = 'query_toks' in row._fields
query_toks_present : bool = 'query_toks' in row._fields
if query_toks_present:
query = '' # Clear the query so it doesn't match the "applypipeline:off" or "_matchop" condictions below... The query_toks query is converted below.
srq = self.manager.newSearchRequest(qid)
Expand Down Expand Up @@ -345,7 +345,10 @@ def _retrieve_one(self, row, input_results=None, docno_provided=False, docid_pro
srq.setControl("parseql", "off")
srq.setControl("matchopql", "on")

if query_toks_present:
if query_toks_present:
if len(row.query_toks) == 0:
warn("Skipping empty query_toks for qid %s" % qid)
return []
srq.setControl("terrierql", "off")
srq.setControl("parsecontrols", "off")
srq.setControl("parseql", "off")
Expand Down Expand Up @@ -745,12 +748,17 @@ def transform(self, queries):
newscores=[]
for row in pt.tqdm(queries.itertuples(), desc=str(self), total=queries.shape[0], unit="q") if self.verbose else queries.itertuples():
qid = str(row.qid)
query = row.query
if len(query) == 0:
warn("Skipping empty query for qid %s" % qid)
continue

srq = self.manager.newSearchRequest(qid, query)
query_toks_present : bool = 'query_toks' in row._fields
if query_toks_present:
query = ''
srq = self.manager.newSearchRequest(qid)
# we'll parse query_toks below
else:
query = row.query
if len(query) == 0:
warn("Skipping empty query for qid %s" % qid)
continue
srq = self.manager.newSearchRequest(qid, query)

for control, value in self.controls.items():
srq.setControl(control, str(value))
Expand All @@ -767,6 +775,17 @@ def transform(self, queries):
srq.setControl("parseql", "off")
srq.setControl("matchopql", "on")

if query_toks_present:
if len(row.query_toks) == 0:
warn("Skipping empty query_toks for qid %s" % qid)
return []
srq.setControl("terrierql", "off")
srq.setControl("parsecontrols", "off")
srq.setControl("parseql", "off")
srq.setControl("matchopql", "on")
query = _querytoks2matchop(row.query_toks)
srq.setOriginalQuery(query)

# this handles the case that a candidate set of documents has been set.
if docno_provided or docid_provided:
# we use RequestContextMatching to make a ResultSet from the
Expand Down
4 changes: 3 additions & 1 deletion tests/test_br.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ def test_br_query_toks(self):

self.assertEqual(len(result_terrier), len(result_matchop))
self.assertEqual(len(result_terrier), len(result_toks))

from pandas.testing import assert_frame_equal
assert_frame_equal(result_terrier[["qid", "docno", "score", "rank"]], result_matchop[["qid", "docno", "score", "rank"]])
assert_frame_equal(result_terrier[["qid", "docno", "score", "rank"]], result_toks[["qid", "docno", "score", "rank"]])

def test_br_cutoff_stability(self):
indexloc = self.here + "/fixtures/index/data.properties"
Expand Down
23 changes: 19 additions & 4 deletions tests/test_fbr.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,6 @@ def test_fbr_reranking2(self):
result1F_map = { row.docno : row.feature0 for row in result1.itertuples() }
result2_map = { row.docno : row.score for row in result2.itertuples() }

print(result1F_map)
print(result2_map)


# check features scores
# NB: places can go no less than 4, as two documents have similar PL2 scores
for rank, row in enumerate(result0.itertuples()):
Expand Down Expand Up @@ -141,6 +137,25 @@ def test_fbr(self):
retrBasic = pt.terrier.Retriever(indexref)
if "matching" in retrBasic.controls:
self.assertNotEqual(retrBasic.controls["matching"], "FatFeaturedScoringMatching,org.terrier.matching.daat.FatFull")

def test_fbr_query_toks(self):
indexloc = self.here + "/fixtures/index/data.properties"

retr = pt.terrier.FeaturesRetriever(indexloc, ["WMODEL:PL2"], wmodel="DPH")
query_terrier = 'applytermpipeline:off chemic^2 reaction^0.5'
result_terrier = retr.search(query_terrier)

query_matchop = '#combine:0=2:1=0.5(chemic reaction)'
result_matchop = retr.search(query_matchop)

query_toks = { 'chemic' : 2, 'reaction' : 0.5}
result_toks = retr.transform(pd.DataFrame([['1', query_toks]], columns=['qid', 'query_toks']))

self.assertEqual(len(result_terrier), len(result_matchop))
self.assertEqual(len(result_terrier), len(result_toks))
from pandas.testing import assert_frame_equal
assert_frame_equal(result_terrier[["qid", "docno", "score", "rank", "features"]], result_matchop[["qid", "docno", "score", "rank", "features"]])
assert_frame_equal(result_terrier[["qid", "docno", "score", "rank", "features"]], result_toks[["qid", "docno", "score", "rank", "features"]])

def test_fbr_example(self):
JIR = pt.java.autoclass('org.terrier.querying.IndexRef')
Expand Down

0 comments on commit 82733e7

Please sign in to comment.