-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcmd_evaluate.py
54 lines (45 loc) · 1.91 KB
/
cmd_evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from preprocessing import create_preprocessor, split_words
from evaluation import generate_qrel, load_topic_tokens
from indexing import create_index_reader, load_document_stats
import gc
import time
index_filepath = 'spimi.index'
stats_filepath = 'spimi.stats'
topics_filepath = './data/TREC8all/topicsTREC8Adhoc.txt'
preprocessor = create_preprocessor(enable_case_folding=True,
enable_remove_stop_words=True,
enable_stemmer=True,
enable_lemmatizer=False,
min_length=2)
print('Loading topics from', topics_filepath)
topics = load_topic_tokens(topics_filepath, preprocess=preprocessor)
print('Searching', len(topics), 'topics')
print('Loading document stats')
document_stats = load_document_stats(stats_filepath)
print('done')
print('Loading search index')
start = time.time()
number_of_documents, index_reader_generator = create_index_reader(index_filepath)
index_reader = index_reader_generator()
index = list(index_reader)
print('done in', time.time() - start, 'seconds')
ranking_method = 'tfidf'
generate_qrel(number_of_documents, index, document_stats, topics,
f'{ranking_method}_results.txt',
ranking_method, 'dev-run')
gc.collect()
ranking_method = 'cosine_tfidf'
generate_qrel(number_of_documents, index, document_stats, topics,
f'{ranking_method}_results.txt',
ranking_method, 'dev-run')
gc.collect()
ranking_method = 'bm25'
generate_qrel(number_of_documents, index, document_stats, topics,
f'{ranking_method}_results.txt',
ranking_method, 'dev-run', { 'k1': 1.2, 'b': 0.75, 'k3': 8.0})
gc.collect()
ranking_method = 'bm25va'
generate_qrel(number_of_documents, index, document_stats, topics,
f'{ranking_method}_results.txt',
ranking_method, 'dev-run', { 'k1': 1.2, 'k3': 8.0})
gc.collect()