Skip to content

Commit

Permalink
add bm25 as dep
Browse files Browse the repository at this point in the history
  • Loading branch information
dnth committed Dec 3, 2024
1 parent e3ef6af commit 607a1ed
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 14 deletions.
28 changes: 14 additions & 14 deletions nbs/bm25_coco-captions.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -100,22 +100,22 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2024-12-03 13:45:10.709\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mxretrieval.core\u001b[0m:\u001b[36mrun_benchmark_bm25\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mRunning BM25 retrieval benchmark\u001b[0m\n",
"\u001b[32m2024-12-03 13:45:10.710\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mxretrieval.datasets.coco\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mCOCO validation dataset found in data/coco/val2017, skipping download\u001b[0m\n",
"\u001b[32m2024-12-03 13:45:11.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mxretrieval.core\u001b[0m:\u001b[36mrun_benchmark_bm25\u001b[0m:\u001b[36m80\u001b[0m - \u001b[1mTokenizing corpus\u001b[0m\n"
"\u001b[32m2024-12-03 14:19:16.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mxretrieval.core\u001b[0m:\u001b[36mrun_benchmark_bm25\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mRunning BM25 retrieval benchmark\u001b[0m\n",
"\u001b[32m2024-12-03 14:19:16.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mxretrieval.datasets.coco\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mCOCO validation dataset found in data/coco/val2017, skipping download\u001b[0m\n",
"\u001b[32m2024-12-03 14:19:17.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mxretrieval.core\u001b[0m:\u001b[36mrun_benchmark_bm25\u001b[0m:\u001b[36m80\u001b[0m - \u001b[1mTokenizing corpus\u001b[0m\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "29b5e9fdf18a492490207b5ff52c0a81",
"model_id": "1735603033a2470dbd43ff1291945485",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -129,7 +129,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "865425dc2e9741fa8dbaea14914f3b9d",
"model_id": "4f37b05ce1f64318bde369abe6e59cdf",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -143,7 +143,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "39033e76d4b84270a31a3b8cbcceb454",
"model_id": "35e0240cf8034a7383b8b149ec20f586",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -157,7 +157,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "92a8a1a8fe424a28b96a7c452958abea",
"model_id": "6f6537967a0d4acea0e7a19245d8382c",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -172,13 +172,13 @@
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2024-12-03 13:45:11.532\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mxretrieval.core\u001b[0m:\u001b[36mrun_benchmark_bm25\u001b[0m:\u001b[36m89\u001b[0m - \u001b[1mPerforming retrieval\u001b[0m\n"
"\u001b[32m2024-12-03 14:19:17.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mxretrieval.core\u001b[0m:\u001b[36mrun_benchmark_bm25\u001b[0m:\u001b[36m89\u001b[0m - \u001b[1mPerforming retrieval\u001b[0m\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1fbc29e3bd9a4612abeed9c0a4917c3c",
"model_id": "c9dc899ac4e142ba836069b35fce2056",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -192,7 +192,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ae54bc2681c94a23a37d633b280b9609",
"model_id": "d3aa36358e0d4d56859d27eef5a5f087",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -206,7 +206,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3a1c113851f546219537b2e65bce70f1",
"model_id": "851a3d64913d4008ab5ffc1758c81f58",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -221,7 +221,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2024-12-03 13:45:12.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mxretrieval.core\u001b[0m:\u001b[36mrun_benchmark_bm25\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mCalculating metrics\u001b[0m\n"
"\u001b[32m2024-12-03 14:19:18.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mxretrieval.core\u001b[0m:\u001b[36mrun_benchmark_bm25\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mCalculating metrics\u001b[0m\n"
]
},
{
Expand Down Expand Up @@ -264,7 +264,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ dependencies = [
"sentence-transformers>=3.3.0",
"timm>=1.0.0",
"accelerate>=1.1.0",
"bm25s>=0.2.5",
"pystemmer>=2.2.0.3",
]

[build-system]
Expand Down

0 comments on commit 607a1ed

Please sign in to comment.