From 7a80108dfe4c438355342207f4dabdb04fa00d90 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Fri, 11 Oct 2024 15:32:06 +0200 Subject: [PATCH] Simplify leaf slice calculation No need to go through the indirection of 2 wrapped functions, just put the logic in plain methods. Also, we can just outright set the field if there's no executor. --- .../apache/lucene/search/IndexSearcher.java | 129 +++++++----------- 1 file changed, 50 insertions(+), 79 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index 9c7274da3eb..894b47565e5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -27,7 +27,6 @@ import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.Executor; -import java.util.function.Function; import java.util.function.Supplier; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; @@ -115,13 +114,7 @@ public class IndexSearcher { protected final IndexReaderContext readerContext; protected final List leafContexts; - /** - * Used with executor - LeafSlice supplier where each slice holds a set of leafs executed within - * one thread. We are caching it instead of creating it eagerly to avoid calling a protected - * method from constructor, which is a bad practice. Always non-null, regardless of whether an - * executor is provided or not. - */ - private final Supplier leafSlicesSupplier; + private volatile LeafSlice[] leafSlices; // Used internally for load balancing threads executing for the query private final TaskExecutor taskExecutor; @@ -230,20 +223,18 @@ public IndexSearcher(IndexReaderContext context, Executor executor) { executor == null ? new TaskExecutor(Runnable::run) : new TaskExecutor(executor); this.readerContext = context; leafContexts = context.leaves(); - Function, LeafSlice[]> slicesProvider = - executor == null - ? leaves -> - leaves.isEmpty() - ? new LeafSlice[0] - : new LeafSlice[] { - new LeafSlice( - new ArrayList<>( - leaves.stream() - .map(LeafReaderContextPartition::createForEntireSegment) - .toList())) - } - : this::slices; - leafSlicesSupplier = new CachingLeafSlicesSupplier(slicesProvider, leafContexts); + if (executor == null) { + leafSlices = + leafContexts.isEmpty() + ? new LeafSlice[0] + : new LeafSlice[] { + new LeafSlice( + new ArrayList<>( + leafContexts.stream() + .map(LeafReaderContextPartition::createForEntireSegment) + .toList())) + }; + } } /** @@ -540,7 +531,43 @@ public int count(Query query) throws IOException { * @lucene.experimental */ public final LeafSlice[] getSlices() { - return leafSlicesSupplier.get(); + LeafSlice[] res = leafSlices; + if (res == null) { + res = computeAndCacheSlices(); + } + return res; + } + + private synchronized LeafSlice[] computeAndCacheSlices() { + LeafSlice[] res = leafSlices; + if (res == null) { + res = slices(leafContexts); + /* + * Enforce that there aren't multiple leaf partitions within the same leaf slice pointing to the + * same leaf context. It is a requirement that {@link Collector#getLeafCollector(LeafReaderContext)} + * gets called once per leaf context. Also, it does not make sense to partition a segment to then search + * those partitions as part of the same slice, because the goal of partitioning is parallel searching + * which happens at the slice level. + */ + for (LeafSlice leafSlice : res) { + if (leafSlice.partitions.length <= 1) { + continue; + } + enforceDistinctLeaves(leafSlice); + } + leafSlices = res; + } + return res; + } + + private static void enforceDistinctLeaves(LeafSlice leafSlice) { + Set distinctLeaves = new HashSet<>(); + for (LeafReaderContextPartition leafPartition : leafSlice.partitions) { + if (distinctLeaves.add(leafPartition.ctx) == false) { + throw new IllegalStateException( + "The same slice targets multiple leaf partitions of the same leaf reader context. A physical segment should rather get partitioned to be searched concurrently from as many slices as the number of leaf partitions it is split into."); + } + } } /** @@ -1169,60 +1196,4 @@ public TooManyNestedClauses() { + IndexSearcher.getMaxClauseCount()); } } - - /** - * Supplier for {@link LeafSlice} slices which computes and caches the value on first invocation - * and returns cached value on subsequent invocation. If the passed in provider for slice - * computation throws exception then same will be passed to the caller of this supplier on each - * invocation. If the provider returns null then {@link NullPointerException} will be thrown to - * the caller. - * - *

NOTE: To provide thread safe caching mechanism this class is implementing the (subtle) double-checked locking - * idiom - */ - private static class CachingLeafSlicesSupplier implements Supplier { - private volatile LeafSlice[] leafSlices; - - private final Function, LeafSlice[]> sliceProvider; - - private final List leaves; - - private CachingLeafSlicesSupplier( - Function, LeafSlice[]> provider, List leaves) { - this.sliceProvider = Objects.requireNonNull(provider, "leaf slice provider cannot be null"); - this.leaves = Objects.requireNonNull(leaves, "list of LeafReaderContext cannot be null"); - } - - @Override - public LeafSlice[] get() { - if (leafSlices == null) { - synchronized (this) { - if (leafSlices == null) { - leafSlices = - Objects.requireNonNull( - sliceProvider.apply(leaves), "slices computed by the provider is null"); - /* - * Enforce that there aren't multiple leaf partitions within the same leaf slice pointing to the - * same leaf context. It is a requirement that {@link Collector#getLeafCollector(LeafReaderContext)} - * gets called once per leaf context. Also, it does not make sense to partition a segment to then search - * those partitions as part of the same slice, because the goal of partitioning is parallel searching - * which happens at the slice level. - */ - for (LeafSlice leafSlice : leafSlices) { - Set distinctLeaves = new HashSet<>(); - for (LeafReaderContextPartition leafPartition : leafSlice.partitions) { - distinctLeaves.add(leafPartition.ctx); - } - if (leafSlice.partitions.length != distinctLeaves.size()) { - throw new IllegalStateException( - "The same slice targets multiple leaf partitions of the same leaf reader context. A physical segment should rather get partitioned to be searched concurrently from as many slices as the number of leaf partitions it is split into."); - } - } - } - } - } - return leafSlices; - } - } }