Skip to content

Commit

Permalink
feat: lower kmeans niters for bigger data sizes (#129)
Browse files Browse the repository at this point in the history
  • Loading branch information
bclavie authored Feb 11, 2024
1 parent 599077f commit 9ef207d
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion ragatouille/models/colbert.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,13 @@ def index(
self.config, ColBERTConfig(nbits=nbits)
)

if len(self.collection) > 100000:
self.config.kmeans_niters = 4
elif len(self.collection) > 50000:
self.config.kmeans_niters = 10
else:
self.config.kmeans_niters = 20

# Instruct colbert-ai to disable forking if nranks == 1
self.config.avoid_fork_if_possible = True
self.indexer = Indexer(
Expand Down Expand Up @@ -468,7 +475,7 @@ def search(
"score": score,
"rank": rank - 1 if zero_index_ranks else rank,
"document_id": document_id,
"passage_id": id_
"passage_id": id_,
}

if self.docid_metadata_map is not None:
Expand Down

0 comments on commit 9ef207d

Please sign in to comment.