-
Notifications
You must be signed in to change notification settings - Fork 2
/
evaluate_sparse_model.sh
108 lines (102 loc) · 3.5 KB
/
evaluate_sparse_model.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# This script shows how to evaluate the latest SPLADEv3 model on Webis-touche-2020-v3 dataset.
# The script has been modified from the SPRINT toolkit (https://github.com/thakur-nandan/sprint)
# For more details, refer to (Thakur et al. 2023): https://dl.acm.org/doi/abs/10.1145/3539618.3591902
# The SPLADEv3 model contains one encoder for both queries and documents.
# The query and document encoder is a BERT-based model.
# SPLADEv3: https://huggingface.co/naver/splade-v3
# For more details, refer to (Lassance et al. 2024): https://arxiv.org/abs/2403.06789
# Parameters:
# You can add multiple GPUs in the `--gpus` parameter for faster inference.
# Add `beir_` before the dataset name in `--data_name` parameter.
# Dataset will get downloaded in your current path (\datasets) if not present.
# Add model checkpoints (query, document) in `--ckpt_name` parameter.
# Add `--do_quantization` parameter to enable quantization.
# Add `--quantization_method` parameter to specify ndigits-round and `--ndigits` = 2 for rounding off by x100.
python -m sprint_toolkit.inference.aio \
--encoder_name splade \
--ckpt_name naver/splade-v3 \
--data_name beir_webis_touche2020_v3 \
--output_dir results/webis-touche2020-v3/splade-v3 \
--gpus 0 \
--do_quantization \
--quantization_method ndigits-round \
--ndigits 2 \
--original_query_format beir \
--topic_split test \
--hits 1000
# To reproduce the numbers in the paper, we used the SPLADEv2 max model (naver/splade_v2_max) which is DistilBERT-based encoder.
# python -m sprint_toolkit.inference.aio \
# --encoder_name splade \
# --ckpt_name naver/splade_v2_max \
# --data_name beir_webis_touche2020_v3 \
# --output_dir results/webis-touche2020-v3/splade_v2_max \
# --gpus 0 \
# --do_quantization \
# --quantization_method ndigits-round \
# --ndigits 2 \
# --original_query_format beir \
# --topic_split test \
# --hits 1000
# You should get the following score on the Webis-touche-2020-v3 dataset:
# {
# "nDCG": {
# "NDCG@1": 0.69388,
# "NDCG@2": 0.70572,
# "NDCG@3": 0.70773,
# "NDCG@5": 0.68809,
# "NDCG@10": 0.67889,
# "NDCG@20": 0.58551,
# "NDCG@100": 0.61202,
# "NDCG@1000": 0.71199
# },
# "MAP": {
# "MAP@1": 0.02931,
# "MAP@2": 0.05647,
# "MAP@3": 0.08342,
# "MAP@5": 0.12988,
# "MAP@10": 0.2407,
# "MAP@20": 0.33839,
# "MAP@100": 0.42571,
# "MAP@1000": 0.45074
# },
# "Recall": {
# "Recall@1": 0.02931,
# "Recall@2": 0.05811,
# "Recall@3": 0.08669,
# "Recall@5": 0.13835,
# "Recall@10": 0.27119,
# "Recall@20": 0.40706,
# "Recall@100": 0.62995,
# "Recall@1000": 0.87859
# },
# "Precision": {
# "P@1": 0.83673,
# "P@2": 0.84694,
# "P@3": 0.84354,
# "P@5": 0.81633,
# "P@10": 0.79388,
# "P@20": 0.58776,
# "P@100": 0.19286,
# "P@1000": 0.02863
# },
# "mrr": {
# "MRR@1": 0.83673,
# "MRR@2": 0.89796,
# "MRR@3": 0.89796,
# "MRR@5": 0.90204,
# "MRR@10": 0.90544,
# "MRR@20": 0.90544,
# "MRR@100": 0.90544,
# "MRR@1000": 0.90544
# },
# "hole": {
# "Hole@1": 0.0,
# "Hole@2": 0.0,
# "Hole@3": 0.0,
# "Hole@5": 0.0,
# "Hole@10": 0.0,
# "Hole@20": 0.26735,
# "Hole@100": 0.75245,
# "Hole@1000": 0.95073
# }
# }