Skip to content

Commit

Permalink
Have byte[] vectors also trigger a timeout in ExitableDirectoryReader (
Browse files Browse the repository at this point in the history
…apache#12423)

`ExitableDirectoryReader` did not wrap searching for `byte[]` vectors. Consequently timeouts were not respected with this reader when searching with `byte[]` vectors.

This commit fixes that bug.
  • Loading branch information
benwtrent authored Jul 7, 2023
1 parent 8611530 commit d03c8f1
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 1 deletion.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ Bug Fixes

* GITHUB#12413: Fix HNSW graph search bug that potentially leaked unapproved docs (Ben Trent).

* GITHUB#12423: Respect timeouts in ExitableDirectoryReader when searching with byte[] vectors (Ben Trent).

Other
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,37 @@ public int length() {
return in.searchNearestVectors(field, target, k, timeoutCheckingAcceptDocs, visitedLimit);
}

@Override
public TopDocs searchNearestVectors(
String field, byte[] target, int k, Bits acceptDocs, int visitedLimit) throws IOException {
// when acceptDocs is null due to no doc deleted, we will instantiate a new one that would
// match all docs to allow timeout checking.
final Bits updatedAcceptDocs =
acceptDocs == null ? new Bits.MatchAllBits(maxDoc()) : acceptDocs;

Bits timeoutCheckingAcceptDocs =
new Bits() {
private static final int MAX_CALLS_BEFORE_QUERY_TIMEOUT_CHECK = 16;
private int calls;

@Override
public boolean get(int index) {
if (calls++ % MAX_CALLS_BEFORE_QUERY_TIMEOUT_CHECK == 0) {
checkAndThrowForSearchVectors();
}

return updatedAcceptDocs.get(index);
}

@Override
public int length() {
return updatedAcceptDocs.length();
}
};

return in.searchNearestVectors(field, target, k, timeoutCheckingAcceptDocs, visitedLimit);
}

private void checkAndThrowForSearchVectors() {
if (queryTimeout.shouldExit()) {
throw new ExitingReaderException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.KnnByteVectorField;
import org.apache.lucene.document.KnnFloatVectorField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
Expand Down Expand Up @@ -404,7 +405,7 @@ public void testDocValues() throws IOException {
directory.close();
}

public void testVectorValues() throws IOException {
public void testFloatVectorValues() throws IOException {
Directory directory = newDirectory();
IndexWriter writer =
new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random())));
Expand Down Expand Up @@ -485,6 +486,81 @@ public void testVectorValues() throws IOException {
directory.close();
}

public void testByteVectorValues() throws IOException {
Directory directory = newDirectory();
IndexWriter writer =
new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random())));

int numDoc = atLeast(20);
int deletedDoc = atMost(5);
int dimension = atLeast(3);

for (int i = 0; i < numDoc; i++) {
Document doc = new Document();
byte[] value = new byte[dimension];
random().nextBytes(value);
doc.add(new KnnByteVectorField("vector", value, VectorSimilarityFunction.COSINE));
doc.add(new StringField("id", Integer.toString(i), Field.Store.YES));
writer.addDocument(doc);
}

writer.forceMerge(1);
writer.commit();

for (int i = 0; i < deletedDoc; i++) {
writer.deleteDocuments(new Term("id", Integer.toString(i)));
}

writer.close();

QueryTimeout queryTimeout;
if (random().nextBoolean()) {
queryTimeout = immediateQueryTimeout();
} else {
queryTimeout = infiniteQueryTimeout();
}

DirectoryReader directoryReader = DirectoryReader.open(directory);
DirectoryReader exitableDirectoryReader = directoryReader;
exitableDirectoryReader = new ExitableDirectoryReader(directoryReader, queryTimeout);
IndexReader reader = new TestReader(getOnlyLeafReader(exitableDirectoryReader));

LeafReaderContext context = reader.leaves().get(0);
LeafReader leaf = context.reader();

if (queryTimeout.shouldExit()) {
expectThrows(
ExitingReaderException.class,
() -> {
DocIdSetIterator iter = leaf.getByteVectorValues("vector");
scanAndRetrieve(leaf, iter);
});

expectThrows(
ExitingReaderException.class,
() ->
leaf.searchNearestVectors(
"vector",
TestVectorUtil.randomVectorBytes(dimension),
5,
leaf.getLiveDocs(),
Integer.MAX_VALUE));
} else {
DocIdSetIterator iter = leaf.getByteVectorValues("vector");
scanAndRetrieve(leaf, iter);

leaf.searchNearestVectors(
"vector",
TestVectorUtil.randomVectorBytes(dimension),
5,
leaf.getLiveDocs(),
Integer.MAX_VALUE);
}

reader.close();
directory.close();
}

private static void scanAndRetrieve(LeafReader leaf, DocIdSetIterator iter) throws IOException {
for (iter.nextDoc();
iter.docID() != DocIdSetIterator.NO_MORE_DOCS && iter.docID() < leaf.maxDoc(); ) {
Expand Down

0 comments on commit d03c8f1

Please sign in to comment.