From 6a7a9a1b2472f8d4a496d5b976ae16be87893b0e Mon Sep 17 00:00:00 2001 From: Mingshi Liu <113382730+mingshl@users.noreply.github.com> Date: Mon, 9 Jan 2023 04:19:15 -0800 Subject: [PATCH] Fix Graph Filter Error in Search (#5665) * fix graph filter out of bound error Signed-off-by: Mingshi Liu * add changelog Signed-off-by: Mingshi Liu * run gradle spotlessApply Signed-off-by: Mingshi Liu * reproduce error in unit test Signed-off-by: Mingshi Liu * format to pass spotlessApply Signed-off-by: Mingshi Liu * organize package Signed-off-by: Mingshi Liu Signed-off-by: Mingshi Liu --- CHANGELOG.md | 2 +- .../opensearch/index/search/MatchQuery.java | 9 +++++- .../index/query/MatchQueryBuilderTests.java | 31 +++++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f35d42e8398ce..2701cb9598ac1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -129,7 +129,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Update thresholds map when cluster manager throttling setting is removed ([#5524](https://github.com/opensearch-project/OpenSearch/pull/5524)) - Fix backward compatibility for static cluster manager throttling threshold setting ([#5633](https://github.com/opensearch-project/OpenSearch/pull/5633)) - Fix index exclusion behavior in snapshot restore and clone APIs ([#5626](https://github.com/opensearch-project/OpenSearch/pull/5626)) - +- Fix graph filter error in search ([#5665](https://github.com/opensearch-project/OpenSearch/pull/5665)) ### Security [Unreleased 3.0]: https://github.com/opensearch-project/OpenSearch/compare/2.4...HEAD diff --git a/server/src/main/java/org/opensearch/index/search/MatchQuery.java b/server/src/main/java/org/opensearch/index/search/MatchQuery.java index c1ea4427a7d1f..91a4b456bfa2a 100644 --- a/server/src/main/java/org/opensearch/index/search/MatchQuery.java +++ b/server/src/main/java/org/opensearch/index/search/MatchQuery.java @@ -70,7 +70,6 @@ import org.opensearch.index.mapper.TextFieldMapper; import org.opensearch.index.query.QueryShardContext; import org.opensearch.index.query.support.QueryParsers; - import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -753,6 +752,14 @@ private Query analyzeGraphBoolean(String field, TokenStream source, BooleanClaus lastState = end; final Query queryPos; boolean usePrefix = isPrefix && end == -1; + /** + * check if the GraphTokenStreamFiniteStrings graph is empty + * return empty BooleanQuery result + */ + Iterator graphIt = graph.getFiniteStrings(); + if (!graphIt.hasNext()) { + return builder.build(); + } if (graph.hasSidePath(start)) { final Iterator it = graph.getFiniteStrings(start, end); Iterator queries = new Iterator() { diff --git a/server/src/test/java/org/opensearch/index/query/MatchQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/MatchQueryBuilderTests.java index 2e18b1829d542..9fa81a180e70a 100644 --- a/server/src/test/java/org/opensearch/index/query/MatchQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/MatchQueryBuilderTests.java @@ -33,6 +33,7 @@ package org.opensearch.index.query; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.tests.analysis.CannedBinaryTokenStream; import org.apache.lucene.tests.analysis.MockSynonymAnalyzer; import org.apache.lucene.index.Term; @@ -52,6 +53,7 @@ import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings; import org.opensearch.action.admin.indices.mapping.put.PutMappingRequest; import org.opensearch.common.ParsingException; import org.opensearch.common.Strings; @@ -72,6 +74,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Iterator; import static org.hamcrest.CoreMatchers.either; import static org.hamcrest.CoreMatchers.instanceOf; @@ -326,6 +329,20 @@ public void testFuzzinessOnNonStringField() throws Exception { query.toQuery(context); // no exception } + public void testMatchQueryWithNoSidePath() throws Exception { + QueryShardContext testContext = createShardContext(); + final MatchQuery testMatchQuery = new MatchQuery(testContext); + MockGraphAnalyzer mockAnalyzer = new MockGraphAnalyzer(createGiantGraphWithNoSide()); + testMatchQuery.setAnalyzer(mockAnalyzer); + testMatchQuery.setAutoGenerateSynonymsPhraseQuery(true); + GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(mockAnalyzer.getTokenStream()); + Iterator graphIt = graph.getFiniteStrings(); + assertEquals(graphIt.hasNext(), false); + String testField = "Gas Lift Storage Bed Frame with Arched Bed Head in King"; + String testValue = "head board, bed head, bedhead, headboard"; + testMatchQuery.parse(Type.BOOLEAN, testField, testValue); // no exception + } + public void testDefaultFuzziness() { MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder("text", TEXT_FIELD_NAME).fuzziness(null); assertNull(matchQueryBuilder.fuzziness()); @@ -542,12 +559,18 @@ private static class MockGraphAnalyzer extends Analyzer { MockGraphAnalyzer(CannedBinaryTokenStream.BinaryToken[] tokens) { this.tokenStream = new CannedBinaryTokenStream(tokens); + } @Override protected TokenStreamComponents createComponents(String fieldName) { return new TokenStreamComponents(r -> {}, tokenStream); } + + public CannedBinaryTokenStream getTokenStream() { + return this.tokenStream; + } + } /** @@ -570,6 +593,14 @@ private static CannedBinaryTokenStream.BinaryToken[] createGiantGraph(int numPos return tokens.toArray(new CannedBinaryTokenStream.BinaryToken[0]); } + /** + * Creates a graph token stream with no side path. + **/ + private static CannedBinaryTokenStream.BinaryToken[] createGiantGraphWithNoSide() { + List tokens = new ArrayList<>(); + return tokens.toArray(new CannedBinaryTokenStream.BinaryToken[0]); + } + /** * Creates a graph token stream with {@link BooleanQuery#getMaxClauseCount()} * expansions at the last position.