From 94ef0cd0644827b5ef61cae23dfdfbb0c284e6f2 Mon Sep 17 00:00:00 2001 From: Thomas Farr Date: Thu, 27 Jul 2023 04:15:05 +1200 Subject: [PATCH] Add k-NN guide and samples (#586) Signed-off-by: Thomas Farr --- USER_GUIDE.md | 190 +----------- guides/plugins/knn.md | 288 ++++++++++++++++++ samples/build.gradle.kts | 39 +++ .../org/opensearch/client/samples/Main.java | 39 +++ .../opensearch/client/samples/RandUtil.java | 23 ++ .../client/samples/SampleClient.java | 71 +++++ .../client/samples/knn/KnnBasics.java | 113 +++++++ .../client/samples/knn/KnnBooleanFilter.java | 159 ++++++++++ .../samples/knn/KnnEfficientFilter.java | 174 +++++++++++ .../client/samples/knn/KnnPainlessScript.java | 117 +++++++ .../client/samples/knn/KnnScriptScore.java | 119 ++++++++ samples/src/main/resources/log4j2.xml | 12 + settings.gradle.kts | 1 + 13 files changed, 1168 insertions(+), 177 deletions(-) create mode 100644 guides/plugins/knn.md create mode 100644 samples/build.gradle.kts create mode 100644 samples/src/main/java/org/opensearch/client/samples/Main.java create mode 100644 samples/src/main/java/org/opensearch/client/samples/RandUtil.java create mode 100644 samples/src/main/java/org/opensearch/client/samples/SampleClient.java create mode 100644 samples/src/main/java/org/opensearch/client/samples/knn/KnnBasics.java create mode 100644 samples/src/main/java/org/opensearch/client/samples/knn/KnnBooleanFilter.java create mode 100644 samples/src/main/java/org/opensearch/client/samples/knn/KnnEfficientFilter.java create mode 100644 samples/src/main/java/org/opensearch/client/samples/knn/KnnPainlessScript.java create mode 100644 samples/src/main/java/org/opensearch/client/samples/knn/KnnScriptScore.java create mode 100644 samples/src/main/resources/log4j2.xml diff --git a/USER_GUIDE.md b/USER_GUIDE.md index 4a5a26c611..bbc609ffbf 100644 --- a/USER_GUIDE.md +++ b/USER_GUIDE.md @@ -1,11 +1,9 @@ -# User Guide - - [User Guide](#user-guide) - [Sample data](#sample-data) - [IndexData class](#indexdata-class) - [Create a client](#create-a-client) - - [Create a client using `RestClientTransport`](#create-a-client-using-restclienttransport) - [Create a client using `ApacheHttpClient5Transport`](#create-a-client-using-apachehttpclient5transport) + - [Create a client using `RestClientTransport` (deprecated)](#create-a-client-using-restclienttransport-deprecated) - [Create an index](#create-an-index) - [Create an index with default settings](#create-an-index-with-default-settings) - [Create an index with custom settings and mappings](#create-an-index-with-custom-settings-and-mappings) @@ -13,9 +11,6 @@ - [Search for the documents](#search-for-the-documents) - [Get raw JSON results](#get-raw-json-results) - [Search documents using a match query](#search-documents-using-a-match-query) - - [Search documents using k-NN](#search-documents-using-k-nn) - - [Exact k-NN with scoring script](#exact-k-nn-with-scoring-script) - - [Exact k-NN with painless scripting extension](#exact-k-nn-with-painless-scripting-extension) - [Search documents using suggesters](#search-documents-using-suggesters) - [App Data class](#app-data-class) - [Using completion suggester](#using-completion-suggester) @@ -39,8 +34,11 @@ - [Cat aliases](#cat-aliases) - [Cat nodes](#cat-nodes) - [Cat point in time segments](#cat-point-in-time-segments) -- [Using different transport options](#using-different-transport-options) - - [Amazon OpenSearch Service](#amazon-opensearch-service) + - [Using different transport options](#using-different-transport-options) + - [Amazon OpenSearch Service](#amazon-opensearch-service) + - [Plugins](#plugins) + +# User Guide ## Sample data @@ -217,172 +215,6 @@ for (int i = 0; i < searchResponse.hits().hits().size(); i++) { } ``` -## Search documents using k-NN - -### Exact k-NN with scoring script - -1. Create index with custom mapping - -```java -String index = "my-knn-index-1"; -TypeMapping mapping = new TypeMapping.Builder() - .properties("my_vector", new Property.Builder() - .knnVector(new KnnVectorProperty.Builder() - .dimension(4) - .build()) - .build()) - .build(); -CreateIndexRequest createIndexRequest = new CreateIndexRequest.Builder() - .index(index) - .mappings(mapping) - .build(); -client.indices().create(createIndexRequest); -``` - -2. Index documents - -```java -JsonObject doc1 = Json.createObjectBuilder() - .add("my_vector", Json.createArrayBuilder().add(1.5).add(5.5).add(4.5).add(6.4).build()) - .add("price", 10.3) - .build(); -JsonObject doc2 = Json.createObjectBuilder() - .add("my_vector", Json.createArrayBuilder().add(2.5).add(3.5).add(5.6).add(6.7).build()) - .add("price", 5.5) - .build(); -JsonObject doc3 = Json.createObjectBuilder() - .add("my_vector", Json.createArrayBuilder().add(4.5).add(5.5).add(6.7).add(3.7).build()) - .add("price", 4.4) - .build(); - -ArrayList operations = new ArrayList<>(); -operations.add(new BulkOperation.Builder().index( - IndexOperation.of(io -> io.index(index).id("1").document(doc1)) - ).build()); -operations.add(new BulkOperation.Builder().index( - IndexOperation.of(io -> io.index(index).id("2").document(doc2)) - ).build()); -operations.add(new BulkOperation.Builder().index( - IndexOperation.of(io -> io.index(index).id("3").document(doc3)) - ).build()); - -BulkRequest bulkRequest = new BulkRequest.Builder() - .index(index) - .operations(operations) - .build(); -client.bulk(bulkRequest); -``` - -3. Search documents using k-NN script score (_This implementation utilizes `com.fasterxml.jackson.databind.JsonNode` as the target document class, which is not part of the OpenSearch Java library. However, any document class that matches the searched data can be used instead._) - -```java -InlineScript inlineScript = new InlineScript.Builder() - .source("knn_score") - .lang("knn") - .params(Map.of( - "field", JsonData.of("my_vector"), - "query_value", JsonData.of(List.of(1.5, 5.5, 4.5, 6.4)), - "space_type", JsonData.of("cosinesimil") - )) - .build(); -Query query = new Query.Builder() - .scriptScore(new ScriptScoreQuery.Builder() - .query(new Query.Builder() - .matchAll(new MatchAllQuery.Builder().build()) - .build()) - .script(new Script.Builder() - .inline(inlineScript) - .build()) - .build()) - .build(); -SearchRequest searchRequest = new SearchRequest.Builder() - .index(index) - .query(query) - .build(); -SearchResponse searchResponse = client.search(searchRequest, JsonNode.class); -``` - -### Exact k-NN with painless scripting extension - -1. Create index with custom mapping - -```java -String index = "my-knn-index-1"; -TypeMapping mapping = new TypeMapping.Builder() - .properties("my_vector", new Property.Builder() - .knnVector(new KnnVectorProperty.Builder() - .dimension(4) - .build()) - .build()) - .build(); -CreateIndexRequest createIndexRequest = new CreateIndexRequest.Builder() - .index(index) - .mappings(mapping) - .build(); -client.indices().create(createIndexRequest); -``` - -2. Index documents - -```java -JsonObject doc1 = Json.createObjectBuilder() - .add("my_vector", Json.createArrayBuilder().add(1.5).add(5.5).add(4.5).add(6.4).build()) - .add("price", 10.3) - .build(); -JsonObject doc2 = Json.createObjectBuilder() - .add("my_vector", Json.createArrayBuilder().add(2.5).add(3.5).add(5.6).add(6.7).build()) - .add("price", 5.5) - .build(); -JsonObject doc3 = Json.createObjectBuilder() - .add("my_vector", Json.createArrayBuilder().add(4.5).add(5.5).add(6.7).add(3.7).build()) - .add("price", 4.4) - .build(); - -ArrayList operations = new ArrayList<>(); -operations.add(new BulkOperation.Builder().index( - IndexOperation.of(io -> io.index(index).id("1").document(doc1)) - ).build()); -operations.add(new BulkOperation.Builder().index( - IndexOperation.of(io -> io.index(index).id("2").document(doc2)) - ).build()); -operations.add(new BulkOperation.Builder().index( - IndexOperation.of(io -> io.index(index).id("3").document(doc3)) - ).build()); - -BulkRequest bulkRequest = new BulkRequest.Builder() - .index(index) - .operations(operations) - .build(); -client.bulk(bulkRequest); -``` - -3. Search documents using k-NN with painless scripting extension (_This implementation utilizes `com.fasterxml.jackson.databind.JsonNode` as the target document class, which is not part of the OpenSearch Java library. However, any document class that matches the searched data can be used instead._) - -```java -InlineScript inlineScript = new InlineScript.Builder() - .source("1.0 + cosineSimilarity(params.query_value, doc[params.field])") - .params(Map.of( - "field", JsonData.of("my_vector"), - "query_value", JsonData.of(List.of(1.5, 5.5, 4.5, 6.4)) - )) - .build(); -Query query = new Query.Builder() - .scriptScore(new ScriptScoreQuery.Builder() - .query(new Query.Builder() - .matchAll(new MatchAllQuery.Builder().build()) - .build()) - .script(new Script.Builder() - .inline(inlineScript) - .build()) - .build()) - .build(); -SearchRequest searchRequest = new SearchRequest.Builder() - .index(index) - .query(query) - .build(); -SearchResult searchResult = client.search(searchRequest, JsonNode.class); -``` - ## Search documents using suggesters ### App Data class @@ -647,7 +479,7 @@ DeleteIndexResponse deleteIndexResponse = client.indices().delete(deleteIndexReq ## Data Stream API -### Create a data stream +### Create a data stream Before creating a data stream, you need to create an index template which configures a set of indices as a data stream. A data stream must have a timestamp field. If not specified, OpenSearch uses `@timestamp` as the default timestamp field name. @@ -760,9 +592,9 @@ SegmentsResponse pitSegmentsResponse = javaClient().cat() .pitSegments(r -> r.headers("index,shard,id,segment,size")); ``` -# Using different transport options +## Using different transport options -## Amazon OpenSearch Service +### Amazon OpenSearch Service Requests to [OpenSearch Service and OpenSearch Serverless](https://docs.aws.amazon.com/opensearch-service/index.html) must be signed using the AWS signing protocol. Use `AwsSdk2Transport` to send signed requests. @@ -784,3 +616,7 @@ System.out.println(info.version().distribution() + ": " + info.version().number( httpClient.close(); ``` + +## Plugins + +- [k-NN](guides/plugins/knn.md) \ No newline at end of file diff --git a/guides/plugins/knn.md b/guides/plugins/knn.md new file mode 100644 index 0000000000..a2c3db72b8 --- /dev/null +++ b/guides/plugins/knn.md @@ -0,0 +1,288 @@ +- [k-NN Plugin](#k-nn-plugin) + - [Basic Approximate k-NN](#basic-approximate-k-nn) + - [Create an Index](#create-an-index) + - [Index Vectors](#index-vectors) + - [Search for Nearest Neighbors](#search-for-nearest-neighbors) + - [Approximate k-NN with a Boolean Filter](#approximate-k-nn-with-a-boolean-filter) + - [Approximate k-NN with an Efficient Filter](#approximate-k-nn-with-an-efficient-filter) + - [Exact k-NN with a scoring script](#exact-k-nn-with-a-scoring-script) + - [Exact k-NN with the Painless scripting extensions](#exact-k-nn-with-the-painless-scripting-extensions) + +# k-NN Plugin + +Short for k-nearest neighbors, the k-NN plugin enables users to search for the k-nearest neighbors to a query point across an index of vectors. See the [plugin's documentation](https://opensearch.org/docs/latest/search-plugins/knn/index/) for more information. + +## Basic Approximate k-NN + +In the following example we create a 5-dimensional k-NN index with random data. You can find a synchronous version of this working sample in [samples/src/main/java/org/opensearch/client/samples/knn/KnnBasics.java](../../samples/src/main/java/org/opensearch/client/samples/knn/KnnBasics.java). + +```bash +$ ./gradlew :samples:run -Dsamples.mainClass=knn.KnnBasics + +[Main] INFO - Running main class: org.opensearch.client.samples.knn.KnnBasics +[KnnBasics] INFO - Server: opensearch@2.7.0 +[KnnBasics] INFO - Creating index my-index +[KnnBasics] INFO - Indexing 10 vectors +[KnnBasics] INFO - Waiting for indexing to finish +[KnnBasics] INFO - Searching for vector [0.67, 0.67, 0.37, 0.0, 0.72] +[KnnBasics] INFO - Found {values=[0.32, 0.96, 0.41, 0.04, 0.9]} with score 0.8050233 +[KnnBasics] INFO - Found {values=[0.04, 0.58, 0.13, 0.27, 0.37]} with score 0.6031363 +[KnnBasics] INFO - Found {values=[0.96, 0.88, 0.8, 0.41, 0.18]} with score 0.5640794 +[KnnBasics] INFO - Deleting index my-index +``` + +### Create an Index + +```java +final var indexName = "my-index"; +final var dimensions = 5; + +client.indices().create(r -> r + .index(indexName) + .settings(s -> s.knn(true)) + .mappings(m -> m + .properties("values", p -> p + .knnVector(k -> k.dimension(dimensions))))); +``` + +### Index Vectors + +Given the following document class definition: + +```java +public static class Doc { + private float[] values; + + public Doc() {} + + public Doc(float[] values) { + this.values = values; + } + + public static Doc rand(int dimensions) { + var values = new float[dimensions]; + for (var i = 0; i < dimensions; ++i) { + values[i] = Math.round(Math.random() * 100.0) / 100.0f; + } + return new Doc(values); + } + + // Getters/Setters & toString elided +} +``` + +Create 10 random vectors and insert them using the bulk API: + +```java +final var nVectors = 10; +var bulkRequest = new BulkRequest.Builder(); +for (var i = 0; i < nVectors; ++i) { + var id = Integer.toString(i); + var doc = Doc.rand(dimensions); + bulkRequest.operations(b -> b + .index(o -> o + .index(indexName) + .id(id) + .document(doc))); +} + +client.bulk(bulkRequest.build()); + +client.indices().refresh(i -> i.index(indexName)); +``` + +### Search for Nearest Neighbors + +Create a random vector of the same size and search for its nearest neighbors. + +```java +final var searchVector = new float[dimensions]; +for (var i = 0; i < dimensions; ++i) { + searchVector[i] = Math.round(Math.random() * 100.0) / 100.0f; +} + +var searchResponse = client.search(s -> s + .index(indexName) + .query(q -> q + .knn(k -> k + .field("values") + .vector(searchVector) + .k(3))), + Doc.class); + +for (var hit : searchResponse.hits().hits()) { + System.out.println(hit.source()); +} +``` + +## Approximate k-NN with a Boolean Filter + +In the [KnnBooleanFilter.java sample](../../samples/src/main/java/org/opensearch/client/samples/knn/KnnBooleanFilter.java) we create a 5-dimensional k-NN index with random data and a `metadata` field that contains a book genre (e.g. `fiction`). The search query is a k-NN search filtered by genre. The filter clause is outside the k-NN query clause and is applied after the k-NN search. + +```java +var searchResponse = client.search(s -> s + .index(indexName) + .query(q -> q + .bool(b -> b + .filter(f -> f + .bool(b2 -> b2 + .must(m -> m + .term(t -> t + .field("metadata.genre") + .value(v -> v.stringValue(searchGenre)))))) + .must(m -> m + .knn(k -> k + .field("values") + .vector(searchVector) + .k(5))))), + Doc.class); +``` + +```bash +$ ./gradlew :samples:run -Dsamples.mainClass=knn.KnnBooleanFilter + +[Main] INFO - Running main class: org.opensearch.client.samples.knn.KnnBooleanFilter +[KnnBooleanFilter] INFO - Server: opensearch@2.7.0 +[KnnBooleanFilter] INFO - Creating index my-index +[KnnBooleanFilter] INFO - Indexing 3000 vectors +[KnnBooleanFilter] INFO - Waiting for indexing to finish +[KnnBooleanFilter] INFO - Searching for vector [0.18, 0.71, 0.44, 0.03, 0.42] with the 'drama' genre +[KnnBooleanFilter] INFO - Found {values=[0.21, 0.58, 0.55, 0.09, 0.45], metadata={genre=drama}} with score 0.966744 +[KnnBooleanFilter] INFO - Deleting index my-index +``` + +## Approximate k-NN with an Efficient Filter + +In the [KnnEfficientFilter.java sample](../../samples/src/main/java/org/opensearch/client/samples/knn/KnnEfficientFilter.java) we implement the example in [the k-NN documentation](https://opensearch.org/docs/latest/search-plugins/knn/filter-search-knn/), which creates an index that uses the Lucene engine and HNSW as the method in the mapping, containing hotel location and parking data, then search for the top three hotels near the location with the coordinates `[5, 4]` that are rated between 8 and 10, inclusive, and provide parking. + +```java +var searchResponse = client.search(s -> s + .index(indexName) + .size(3) + .query(q -> q + .knn(k -> k + .field("location") + .vector(searchLocation) + .k(3) + .filter(Query.of(f -> f + .bool(b -> b + .must(m -> m + .range(r -> r + .field("rating") + .gte(JsonData.of(searchRatingMin)) + .lte(JsonData.of(searchRatingMax)))) + .must(m -> m + .term(t -> t + .field("parking") + .value(FieldValue.of(searchParking))))))))), + Hotel.class); +``` + +```bash +$ ./gradlew :samples:run -Dsamples.mainClass=knn.KnnEfficientFilter + +[Main] INFO - Running main class: org.opensearch.client.samples.knn.KnnEfficientFilter +[KnnEfficientFilter] INFO - Server: opensearch@2.7.0 +[KnnEfficientFilter] INFO - Creating index hotels-index +[KnnEfficientFilter] INFO - Indexing hotel {location=[5.2, 4.0], parking=true, rating=5} with id 1 +[KnnEfficientFilter] INFO - Indexing hotel {location=[5.2, 3.9], parking=false, rating=4} with id 2 +[KnnEfficientFilter] INFO - Indexing hotel {location=[4.9, 3.4], parking=true, rating=9} with id 3 +[KnnEfficientFilter] INFO - Indexing hotel {location=[4.2, 4.6], parking=false, rating=6} with id 4 +[KnnEfficientFilter] INFO - Indexing hotel {location=[3.3, 4.5], parking=true, rating=8} with id 5 +[KnnEfficientFilter] INFO - Indexing hotel {location=[6.4, 3.4], parking=true, rating=9} with id 6 +[KnnEfficientFilter] INFO - Indexing hotel {location=[4.2, 6.2], parking=true, rating=5} with id 7 +[KnnEfficientFilter] INFO - Indexing hotel {location=[2.4, 4.0], parking=true, rating=8} with id 8 +[KnnEfficientFilter] INFO - Indexing hotel {location=[1.4, 3.2], parking=false, rating=5} with id 9 +[KnnEfficientFilter] INFO - Indexing hotel {location=[7.0, 9.9], parking=true, rating=9} with id 10 +[KnnEfficientFilter] INFO - Indexing hotel {location=[3.0, 2.3], parking=false, rating=6} with id 11 +[KnnEfficientFilter] INFO - Indexing hotel {location=[5.0, 1.0], parking=true, rating=3} with id 12 +[KnnEfficientFilter] INFO - Indexing 12 documents +[KnnEfficientFilter] INFO - Waiting for indexing to finish +[KnnEfficientFilter] INFO - Searching for hotel near [5.0, 4.0] with rating >=8,<=10 and parking=true +[KnnEfficientFilter] INFO - Found {location=[4.9, 3.4], parking=true, rating=9} with score 0.72992706 +[KnnEfficientFilter] INFO - Found {location=[6.4, 3.4], parking=true, rating=9} with score 0.3012048 +[KnnEfficientFilter] INFO - Found {location=[3.3, 4.5], parking=true, rating=8} with score 0.24154587 +[KnnEfficientFilter] INFO - Deleting index hotels-index +``` + +## Exact k-NN with a scoring script + +In the [KnnScriptScore.java sample](../../samples/src/main/java/org/opensearch/client/samples/knn/KnnScriptScore.java) we create a 5-dimensional k-NN index with random data. The search query uses the [k-NN scoring script](https://opensearch.org/docs/latest/search-plugins/knn/knn-score-script/) to calculate exact nearest neighbors. + +```java +var searchResponse = client.search(s -> s + .index(indexName) + .query(q -> q + .scriptScore(ss -> ss + .query(qq -> qq.matchAll(m -> m)) + .script(sss -> sss + .inline(i -> i + .source("knn_score") + .lang("knn") + .params("field", JsonData.of("values")) + .params("query_value", JsonData.of(searchVector)) + .params("space_type", JsonData.of("cosinesimil")))))), + Doc.class); +``` + +```bash +$ ./gradlew :samples:run -Dsamples.mainClass=knn.KnnScriptScore + +[Main] INFO - Running main class: org.opensearch.client.samples.knn.KnnScriptScore +[KnnScriptScore] INFO - Server: opensearch@2.7.0 +[KnnScriptScore] INFO - Creating index my-index +[KnnScriptScore] INFO - Indexing 10 vectors +[KnnScriptScore] INFO - Waiting for indexing to finish +[KnnScriptScore] INFO - Searching for vector [0.94, 0.1, 0.39, 0.63, 0.42] +[KnnScriptScore] INFO - Found {values=[0.66, 0.23, 0.15, 0.44, 0.13]} with score 1.9564294 +[KnnScriptScore] INFO - Found {values=[0.94, 0.05, 0.86, 0.68, 0.05]} with score 1.90958 +[KnnScriptScore] INFO - Found {values=[0.88, 0.72, 0.29, 0.48, 0.56]} with score 1.8788767 +[KnnScriptScore] INFO - Found {values=[0.97, 0.99, 0.66, 0.61, 0.91]} with score 1.847905 +[KnnScriptScore] INFO - Found {values=[0.18, 0.29, 0.43, 0.63, 0.25]} with score 1.7819176 +[KnnScriptScore] INFO - Found {values=[0.35, 0.2, 0.62, 0.4, 0.96]} with score 1.7673628 +[KnnScriptScore] INFO - Found {values=[0.34, 0.59, 0.05, 0.47, 0.54]} with score 1.7316635 +[KnnScriptScore] INFO - Found {values=[0.55, 0.98, 0.07, 0.57, 0.06]} with score 1.6385877 +[KnnScriptScore] INFO - Found {values=[0.03, 0.72, 0.89, 0.83, 0.46]} with score 1.6147845 +[KnnScriptScore] INFO - Found {values=[0.17, 0.81, 0.09, 0.21, 0.3]} with score 1.4616101 +[KnnScriptScore] INFO - Deleting index my-index +``` + +## Exact k-NN with the Painless scripting extensions + +In the [KnnPainlessScript.java sample](../../samples/src/main/java/org/opensearch/client/samples/knn/KnnPainlessScript.java) we create a 5-dimensional k-NN index with random data. The search query uses the [k-NN Painless extensions](https://opensearch.org/docs/latest/search-plugins/knn/painless-functions/) to calculate exact nearest neighbors. + +```java +var searchResponse = client.search(s -> s + .index(indexName) + .query(q -> q + .scriptScore(ss -> ss + .query(qq -> qq.matchAll(m -> m)) + .script(sss -> sss + .inline(i -> i + .source("1.0 + cosineSimilarity(params.query_value, doc[params.field])") + .params("field", JsonData.of("values")) + .params("query_value", JsonData.of(searchVector)))))), + Doc.class); +``` + +```bash +$ ./gradlew :samples:run -Dsamples.mainClass=knn.KnnPainlessScript + +[Main] INFO - Running main class: org.opensearch.client.samples.knn.KnnPainlessScript +[KnnPainlessScript] INFO - Server: opensearch@2.7.0 +[KnnPainlessScript] INFO - Creating index my-index +[KnnPainlessScript] INFO - Indexing 10 vectors +[KnnPainlessScript] INFO - Waiting for indexing to finish +[KnnPainlessScript] INFO - Searching for vector [0.57, 0.86, 0.37, 0.07, 0.38] +[KnnPainlessScript] INFO - Found {values=[1.0, 0.6, 0.66, 0.03, 0.18]} with score 1.8911908 +[KnnPainlessScript] INFO - Found {values=[0.4, 0.39, 0.63, 0.09, 0.39]} with score 1.8776901 +[KnnPainlessScript] INFO - Found {values=[0.32, 0.98, 0.7, 0.7, 0.77]} with score 1.8616674 +[KnnPainlessScript] INFO - Found {values=[0.93, 0.35, 0.27, 0.45, 0.81]} with score 1.789043 +[KnnPainlessScript] INFO - Found {values=[0.81, 0.36, 0.87, 0.78, 0.56]} with score 1.7457235 +[KnnPainlessScript] INFO - Found {values=[0.55, 0.19, 0.61, 0.42, 0.4]} with score 1.743325 +[KnnPainlessScript] INFO - Found {values=[0.12, 0.54, 0.09, 0.83, 0.28]} with score 1.6045148 +[KnnPainlessScript] INFO - Found {values=[0.0, 0.04, 0.63, 0.07, 0.9]} with score 1.479921 +[KnnPainlessScript] INFO - Found {values=[0.41, 0.05, 0.52, 1.0, 0.18]} with score 1.4306322 +[KnnPainlessScript] INFO - Found {values=[0.22, 0.1, 0.59, 0.89, 0.15]} with score 1.4274814 +[KnnPainlessScript] INFO - Deleting index my-index +``` \ No newline at end of file diff --git a/samples/build.gradle.kts b/samples/build.gradle.kts new file mode 100644 index 0000000000..b8e7339202 --- /dev/null +++ b/samples/build.gradle.kts @@ -0,0 +1,39 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +plugins { + java + application + checkstyle +} + +checkstyle { + toolVersion = "10.0" +} + +java { + targetCompatibility = JavaVersion.VERSION_11 + sourceCompatibility = JavaVersion.VERSION_11 +} + +dependencies { + implementation(project(":java-client")) + implementation("org.apache.logging.log4j", "log4j-api","[2.17.1,3.0)") + implementation("org.apache.logging.log4j", "log4j-core","[2.17.1,3.0)") + implementation("org.apache.logging.log4j", "log4j-slf4j2-impl","[2.17.1,3.0)") + implementation("commons-logging", "commons-logging", "1.2") + implementation("com.fasterxml.jackson.core", "jackson-databind", "2.15.2") +} + +application { + mainClass.set("org.opensearch.client.samples.Main") +} + +tasks.named("run") { + systemProperty("samples.mainClass", System.getProperty("samples.mainClass")) +} diff --git a/samples/src/main/java/org/opensearch/client/samples/Main.java b/samples/src/main/java/org/opensearch/client/samples/Main.java new file mode 100644 index 0000000000..f6f9e4ac01 --- /dev/null +++ b/samples/src/main/java/org/opensearch/client/samples/Main.java @@ -0,0 +1,39 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.client.samples; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +public class Main { + private static final Logger LOGGER = LogManager.getLogger(Main.class); + + public static void main(String[] args) { + var mainClass = System.getProperty("samples.mainClass"); + if (mainClass == null || mainClass.isEmpty()) { + LOGGER.error("Please specify the main class to run with -Dsamples.mainClass=
"); + System.exit(1); + } + + if (!mainClass.startsWith("org.")) { + mainClass = Main.class.getPackageName() + "." + mainClass; + } + + LOGGER.info("Running main class: {}", mainClass); + + try { + final var clazz = Class.forName(mainClass); + final var mainMethod = clazz.getMethod("main", String[].class); + mainMethod.invoke(null, (Object) args); + } catch (Exception e) { + LOGGER.error("Failed to run main class", e); + System.exit(1); + } + } +} diff --git a/samples/src/main/java/org/opensearch/client/samples/RandUtil.java b/samples/src/main/java/org/opensearch/client/samples/RandUtil.java new file mode 100644 index 0000000000..f6706aa24f --- /dev/null +++ b/samples/src/main/java/org/opensearch/client/samples/RandUtil.java @@ -0,0 +1,23 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.client.samples; + +public class RandUtil { + public static float[] rand2SfArray(int n) { + var arr = new float[n]; + for (var i = 0; i < n; ++i) { + arr[i] = Math.round(Math.random() * 100.0) / 100.0f; + } + return arr; + } + + public static T choice(T[] arr) { + return arr[(int)Math.floor(Math.random() * arr.length)]; + } +} diff --git a/samples/src/main/java/org/opensearch/client/samples/SampleClient.java b/samples/src/main/java/org/opensearch/client/samples/SampleClient.java new file mode 100644 index 0000000000..9b73d9504f --- /dev/null +++ b/samples/src/main/java/org/opensearch/client/samples/SampleClient.java @@ -0,0 +1,71 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.client.samples; + +import java.security.KeyManagementException; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; +import org.apache.hc.client5.http.auth.AuthScope; +import org.apache.hc.client5.http.auth.UsernamePasswordCredentials; +import org.apache.hc.client5.http.impl.auth.BasicCredentialsProvider; +import org.apache.hc.client5.http.impl.nio.PoolingAsyncClientConnectionManagerBuilder; +import org.apache.hc.client5.http.ssl.ClientTlsStrategyBuilder; +import org.apache.hc.client5.http.ssl.NoopHostnameVerifier; +import org.apache.hc.core5.http.HttpHost; +import org.apache.hc.core5.ssl.SSLContextBuilder; +import org.opensearch.client.json.jackson.JacksonJsonpMapper; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder; + +public class SampleClient { + public static OpenSearchClient create() throws NoSuchAlgorithmException, KeyStoreException, KeyManagementException { + var env = System.getenv(); + var https = Boolean.parseBoolean(env.getOrDefault("HTTPS", "true")); + var hostname = env.getOrDefault("HOST", "localhost"); + var port = Integer.parseInt(env.getOrDefault("PORT", "9200")); + var user = env.getOrDefault("USERNAME", "admin"); + var pass = env.getOrDefault("PASSWORD", "admin"); + + final var hosts = new HttpHost[]{ + new HttpHost(https ? "https" : "http", hostname, port) + }; + + final var sslContext = SSLContextBuilder.create() + .loadTrustMaterial(null, (chains, authType) -> true) + .build(); + + final var transport = ApacheHttpClient5TransportBuilder + .builder(hosts) + .setMapper(new JacksonJsonpMapper()) + .setHttpClientConfigCallback(httpClientBuilder -> { + final var credentialsProvider = new BasicCredentialsProvider(); + for (final var host : hosts) { + credentialsProvider.setCredentials( + new AuthScope(host), + new UsernamePasswordCredentials(user, pass.toCharArray())); + } + + // Disable SSL/TLS verification as our local testing clusters use self-signed certificates + final var tlsStrategy = ClientTlsStrategyBuilder.create() + .setSslContext(sslContext) + .setHostnameVerifier(NoopHostnameVerifier.INSTANCE) + .build(); + + final var connectionManager = PoolingAsyncClientConnectionManagerBuilder.create() + .setTlsStrategy(tlsStrategy) + .build(); + + return httpClientBuilder + .setDefaultCredentialsProvider(credentialsProvider) + .setConnectionManager(connectionManager); + }) + .build(); + return new OpenSearchClient(transport); + } +} diff --git a/samples/src/main/java/org/opensearch/client/samples/knn/KnnBasics.java b/samples/src/main/java/org/opensearch/client/samples/knn/KnnBasics.java new file mode 100644 index 0000000000..c34bcd510b --- /dev/null +++ b/samples/src/main/java/org/opensearch/client/samples/knn/KnnBasics.java @@ -0,0 +1,113 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.client.samples.knn; + +import java.util.Arrays; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.client.opensearch.core.BulkRequest; +import org.opensearch.client.samples.RandUtil; +import org.opensearch.client.samples.SampleClient; + +/** + * Run with: ./gradlew :samples:run -Dsamples.mainClass=knn.KnnBasics + */ +public class KnnBasics { + private static final Logger LOGGER = LogManager.getLogger(KnnBasics.class); + + public static void main(String[] args) { + try { + var client = SampleClient.create(); + + var version = client.info().version(); + LOGGER.info("Server: {}@{}", version.distribution(), version.number()); + + final var indexName = "my-index"; + final var dimensions = 5; + + if (!client.indices().exists(r -> r.index(indexName)).value()) { + LOGGER.info("Creating index {}", indexName); + client.indices().create(r -> r + .index(indexName) + .settings(s -> s.knn(true)) + .mappings(m -> m + .properties("values", p -> p + .knnVector(k -> k.dimension(dimensions))))); + } + + final var nVectors = 10; + var bulkRequest = new BulkRequest.Builder(); + for (var i = 0; i < nVectors; ++i) { + var id = Integer.toString(i); + var doc = Doc.rand(dimensions); + bulkRequest.operations(b -> b + .index(o -> o + .index(indexName) + .id(id) + .document(doc))); + } + + LOGGER.info("Indexing {} vectors", nVectors); + client.bulk(bulkRequest.build()); + + LOGGER.info("Waiting for indexing to finish"); + client.indices().refresh(i -> i.index(indexName)); + + final var searchVector = RandUtil.rand2SfArray(dimensions); + LOGGER.info("Searching for vector {}", searchVector); + + var searchResponse = client.search(s -> s + .index(indexName) + .query(q -> q + .knn(k -> k + .field("values") + .vector(searchVector) + .k(3))), + Doc.class); + + for (var hit : searchResponse.hits().hits()) { + LOGGER.info("Found {} with score {}", hit.source(), hit.score()); + } + + LOGGER.info("Deleting index {}", indexName); + client.indices().delete(r -> r.index(indexName)); + } catch (Exception e) { + LOGGER.error("Unexpected exception", e); + } + } + + public static class Doc { + private float[] values; + + public Doc() {} + + public Doc(float[] values) { + this.values = values; + } + + public static Doc rand(int dimensions) { + return new Doc(RandUtil.rand2SfArray(dimensions)); + } + + public float[] getValues() { + return values; + } + + public void setValues(float[] values) { + this.values = values; + } + + @Override + public String toString() { + return "{" + + "values=" + Arrays.toString(values) + + '}'; + } + } +} diff --git a/samples/src/main/java/org/opensearch/client/samples/knn/KnnBooleanFilter.java b/samples/src/main/java/org/opensearch/client/samples/knn/KnnBooleanFilter.java new file mode 100644 index 0000000000..9b8121da5e --- /dev/null +++ b/samples/src/main/java/org/opensearch/client/samples/knn/KnnBooleanFilter.java @@ -0,0 +1,159 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.client.samples.knn; + +import java.util.Arrays; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.client.opensearch.core.BulkRequest; +import org.opensearch.client.samples.RandUtil; +import org.opensearch.client.samples.SampleClient; + +/** + * Run with: ./gradlew :samples:run -Dsamples.mainClass=knn.KnnBooleanFilter + */ +public class KnnBooleanFilter { + private static final Logger LOGGER = LogManager.getLogger(KnnBooleanFilter.class); + + public static void main(String[] args) { + try { + var client = SampleClient.create(); + + var version = client.info().version(); + LOGGER.info("Server: {}@{}", version.distribution(), version.number()); + + final var indexName = "my-index"; + final var dimensions = 5; + + if (!client.indices().exists(r -> r.index(indexName)).value()) { + LOGGER.info("Creating index {}", indexName); + client.indices().create(r -> r + .index(indexName) + .settings(s -> s.knn(true)) + .mappings(m -> m + .properties("values", p -> p + .knnVector(k -> k.dimension(dimensions))))); + } + + final var nVectors = 3000; + final var genres = new String[] {"fiction", "drama", "romance"}; + var bulkRequest = new BulkRequest.Builder(); + for (var i = 0; i < nVectors; ++i) { + var id = Integer.toString(i); + var doc = Doc.rand(dimensions, genres); + bulkRequest.operations(b -> b + .index(o -> o + .index(indexName) + .id(id) + .document(doc))); + } + + LOGGER.info("Indexing {} vectors", nVectors); + client.bulk(bulkRequest.build()); + + LOGGER.info("Waiting for indexing to finish"); + client.indices().refresh(i -> i.index(indexName)); + + final var searchGenre = RandUtil.choice(genres); + final var searchVector = RandUtil.rand2SfArray(dimensions); + LOGGER.info("Searching for vector {} with the '{}' genre", searchVector, searchGenre); + + var searchResponse = client.search(s -> s + .index(indexName) + .query(q -> q + .bool(b -> b + .filter(f -> f + .bool(b2 -> b2 + .must(m -> m + .term(t -> t + .field("metadata.genre") + .value(v -> v.stringValue(searchGenre)))))) + .must(m -> m + .knn(k -> k + .field("values") + .vector(searchVector) + .k(5))))), + Doc.class); + + for (var hit : searchResponse.hits().hits()) { + LOGGER.info("Found {} with score {}", hit.source(), hit.score()); + } + + LOGGER.info("Deleting index {}", indexName); + client.indices().delete(r -> r.index(indexName)); + } catch (Exception e) { + LOGGER.error("Unexpected exception", e); + } + } + + public static class Doc { + private float[] values; + private Metadata metadata; + + public Doc() {} + + public Doc(float[] values, Metadata metadata) { + this.values = values; + this.metadata = metadata; + } + + public static Doc rand(int dimensions, String[] genres) { + return new Doc(RandUtil.rand2SfArray(dimensions), new Metadata(RandUtil.choice(genres))); + } + + public float[] getValues() { + return values; + } + + public void setValues(float[] values) { + this.values = values; + } + + public Metadata getMetadata() { + return metadata; + } + + public void setMetadata(Metadata metadata) { + this.metadata = metadata; + } + + @Override + public String toString() { + return "{" + + "values=" + Arrays.toString(values) + + ", metadata=" + metadata + + '}'; + } + } + + private static class Metadata { + private String genre; + + private Metadata() {} + + private Metadata(String genre) { + this.genre = genre; + } + + public String getGenre() { + return genre; + } + + public void setGenre(String genre) { + this.genre = genre; + } + + @Override + public String toString() { + return "{" + + "genre=" + genre + + '}'; + } + } +} diff --git a/samples/src/main/java/org/opensearch/client/samples/knn/KnnEfficientFilter.java b/samples/src/main/java/org/opensearch/client/samples/knn/KnnEfficientFilter.java new file mode 100644 index 0000000000..6f512a0f1d --- /dev/null +++ b/samples/src/main/java/org/opensearch/client/samples/knn/KnnEfficientFilter.java @@ -0,0 +1,174 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.client.samples.knn; + +import java.util.Arrays; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.client.json.JsonData; +import org.opensearch.client.opensearch._types.FieldValue; +import org.opensearch.client.opensearch._types.query_dsl.Query; +import org.opensearch.client.opensearch.core.BulkRequest; +import org.opensearch.client.samples.SampleClient; + +/** + * Run with: ./gradlew :samples:run -Dsamples.mainClass=knn.KnnEfficientFilter + */ +public class KnnEfficientFilter { + private static final Logger LOGGER = LogManager.getLogger(KnnEfficientFilter.class); + + public static void main(String[] args) { + try { + var client = SampleClient.create(); + + var version = client.info().version(); + LOGGER.info("Server: {}@{}", version.distribution(), version.number()); + + final var indexName = "hotels-index"; + + if (!client.indices().exists(r -> r.index(indexName)).value()) { + LOGGER.info("Creating index {}", indexName); + client.indices().create(r -> r + .index(indexName) + .settings(s -> s + .knn(true) + .knnAlgoParamEfSearch(100) + .numberOfShards("1") + .numberOfReplicas("0")) + .mappings(m -> m + .properties("location", p -> p + .knnVector(k -> k + .dimension(2) + .method(v -> v + .name("hnsw") + .spaceType("l2") + .engine("lucene") + .parameters("ef_construction", JsonData.of(100)) + .parameters("m", JsonData.of(16))))))); + } + + final var hotels = new Hotel[] { + new Hotel(5.2f, 4.f, true, 5), + new Hotel(5.2f, 3.9f, false, 4), + new Hotel(4.9f, 3.4f, true, 9), + new Hotel(4.2f, 4.6f, false, 6), + new Hotel(3.3f, 4.5f, true, 8), + new Hotel(6.4f, 3.4f, true, 9), + new Hotel(4.2f, 6.2f, true, 5), + new Hotel(2.4f, 4.0f, true, 8), + new Hotel(1.4f, 3.2f, false, 5), + new Hotel(7.0f, 9.9f, true, 9), + new Hotel(3.0f, 2.3f, false, 6), + new Hotel(5.0f, 1.0f, true, 3), + }; + var bulkRequest = new BulkRequest.Builder(); + for (var i = 0; i < hotels.length; ++i) { + final var id = Integer.toString(i + 1); + final var hotel = hotels[i]; + LOGGER.info("Indexing hotel {} with id {}", hotel, id); + bulkRequest.operations(b -> b + .index(o -> o + .index(indexName) + .id(id) + .document(hotel))); + } + + LOGGER.info("Indexing {} documents", hotels.length); + client.bulk(bulkRequest.build()); + + LOGGER.info("Waiting for indexing to finish"); + client.indices().refresh(i -> i.index(indexName)); + + final var searchLocation = new float[]{ 5.0f, 4.0f }; + final var searchRatingMin = 8; + final var searchRatingMax = 10; + final var searchParking = true; + LOGGER.info( + "Searching for hotel near {} with rating >={},<={} and parking={}", + searchLocation, searchRatingMin, searchRatingMax, searchParking); + + var searchResponse = client.search(s -> s + .index(indexName) + .size(3) + .query(q -> q + .knn(k -> k + .field("location") + .vector(searchLocation) + .k(3) + .filter(Query.of(f -> f + .bool(b -> b + .must(m -> m + .range(r -> r + .field("rating") + .gte(JsonData.of(searchRatingMin)) + .lte(JsonData.of(searchRatingMax)))) + .must(m -> m + .term(t -> t + .field("parking") + .value(FieldValue.of(searchParking))))))))), + Hotel.class); + + for (var hit : searchResponse.hits().hits()) { + LOGGER.info("Found {} with score {}", hit.source(), hit.score()); + } + + LOGGER.info("Deleting index {}", indexName); + client.indices().delete(r -> r.index(indexName)); + } catch (Exception e) { + LOGGER.error("Unexpected exception", e); + } + } + + public static class Hotel { + private float[] location; + private boolean parking; + private int rating; + + public Hotel() {} + + public Hotel(float locX, float locY, boolean parking, int rating) { + this.location = new float[] {locX, locY}; + this.parking = parking; + this.rating = rating; + } + + public float[] getLocation() { + return location; + } + + public void setLocation(float[] location) { + this.location = location; + } + + public boolean isParking() { + return parking; + } + + public void setParking(boolean parking) { + this.parking = parking; + } + + public int getRating() { + return rating; + } + + public void setRating(int rating) { + this.rating = rating; + } + + @Override + public String toString() { + return "{" + + "location=" + Arrays.toString(location) + + ", parking=" + parking + + ", rating=" + rating + + '}'; + } + } +} diff --git a/samples/src/main/java/org/opensearch/client/samples/knn/KnnPainlessScript.java b/samples/src/main/java/org/opensearch/client/samples/knn/KnnPainlessScript.java new file mode 100644 index 0000000000..de39b8a82b --- /dev/null +++ b/samples/src/main/java/org/opensearch/client/samples/knn/KnnPainlessScript.java @@ -0,0 +1,117 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.client.samples.knn; + +import java.util.Arrays; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.client.json.JsonData; +import org.opensearch.client.opensearch.core.BulkRequest; +import org.opensearch.client.samples.RandUtil; +import org.opensearch.client.samples.SampleClient; + +/** + * Run with: ./gradlew :samples:run -Dsamples.mainClass=knn.KnnPainlessScript + */ +public class KnnPainlessScript { + private static final Logger LOGGER = LogManager.getLogger(KnnPainlessScript.class); + + public static void main(String[] args) { + try { + var client = SampleClient.create(); + + var version = client.info().version(); + LOGGER.info("Server: {}@{}", version.distribution(), version.number()); + + final var indexName = "my-index"; + final var dimensions = 5; + + if (!client.indices().exists(r -> r.index(indexName)).value()) { + LOGGER.info("Creating index {}", indexName); + client.indices().create(r -> r + .index(indexName) + .settings(s -> s.knn(true)) + .mappings(m -> m + .properties("values", p -> p + .knnVector(k -> k.dimension(dimensions))))); + } + + final var nVectors = 10; + var bulkRequest = new BulkRequest.Builder(); + for (var i = 0; i < nVectors; ++i) { + var id = Integer.toString(i); + var doc = Doc.rand(dimensions); + bulkRequest.operations(b -> b + .index(o -> o + .index(indexName) + .id(id) + .document(doc))); + } + + LOGGER.info("Indexing {} vectors", nVectors); + client.bulk(bulkRequest.build()); + + LOGGER.info("Waiting for indexing to finish"); + client.indices().refresh(i -> i.index(indexName)); + + final var searchVector = RandUtil.rand2SfArray(dimensions); + LOGGER.info("Searching for vector {}", searchVector); + + var searchResponse = client.search(s -> s + .index(indexName) + .query(q -> q + .scriptScore(ss -> ss + .query(qq -> qq.matchAll(m -> m)) + .script(sss -> sss + .inline(i -> i + .source("1.0 + cosineSimilarity(params.query_value, doc[params.field])") + .params("field", JsonData.of("values")) + .params("query_value", JsonData.of(searchVector)))))), + Doc.class); + + for (var hit : searchResponse.hits().hits()) { + LOGGER.info("Found {} with score {}", hit.source(), hit.score()); + } + + LOGGER.info("Deleting index {}", indexName); + client.indices().delete(r -> r.index(indexName)); + } catch (Exception e) { + LOGGER.error("Unexpected exception", e); + } + } + + public static class Doc { + private float[] values; + + public Doc() {} + + public Doc(float[] values) { + this.values = values; + } + + public static Doc rand(int dimensions) { + return new Doc(RandUtil.rand2SfArray(dimensions)); + } + + public float[] getValues() { + return values; + } + + public void setValues(float[] values) { + this.values = values; + } + + @Override + public String toString() { + return "{" + + "values=" + Arrays.toString(values) + + '}'; + } + } +} diff --git a/samples/src/main/java/org/opensearch/client/samples/knn/KnnScriptScore.java b/samples/src/main/java/org/opensearch/client/samples/knn/KnnScriptScore.java new file mode 100644 index 0000000000..035aabb3e4 --- /dev/null +++ b/samples/src/main/java/org/opensearch/client/samples/knn/KnnScriptScore.java @@ -0,0 +1,119 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.client.samples.knn; + +import java.util.Arrays; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.client.json.JsonData; +import org.opensearch.client.opensearch.core.BulkRequest; +import org.opensearch.client.samples.RandUtil; +import org.opensearch.client.samples.SampleClient; + +/** + * Run with: ./gradlew :samples:run -Dsamples.mainClass=knn.KnnScriptScore + */ +public class KnnScriptScore { + private static final Logger LOGGER = LogManager.getLogger(KnnScriptScore.class); + + public static void main(String[] args) { + try { + var client = SampleClient.create(); + + var version = client.info().version(); + LOGGER.info("Server: {}@{}", version.distribution(), version.number()); + + final var indexName = "my-index"; + final var dimensions = 5; + + if (!client.indices().exists(r -> r.index(indexName)).value()) { + LOGGER.info("Creating index {}", indexName); + client.indices().create(r -> r + .index(indexName) + .settings(s -> s.knn(true)) + .mappings(m -> m + .properties("values", p -> p + .knnVector(k -> k.dimension(dimensions))))); + } + + final var nVectors = 10; + var bulkRequest = new BulkRequest.Builder(); + for (var i = 0; i < nVectors; ++i) { + var id = Integer.toString(i); + var doc = Doc.rand(dimensions); + bulkRequest.operations(b -> b + .index(o -> o + .index(indexName) + .id(id) + .document(doc))); + } + + LOGGER.info("Indexing {} vectors", nVectors); + client.bulk(bulkRequest.build()); + + LOGGER.info("Waiting for indexing to finish"); + client.indices().refresh(i -> i.index(indexName)); + + final var searchVector = RandUtil.rand2SfArray(dimensions); + LOGGER.info("Searching for vector {}", searchVector); + + var searchResponse = client.search(s -> s + .index(indexName) + .query(q -> q + .scriptScore(ss -> ss + .query(qq -> qq.matchAll(m -> m)) + .script(sss -> sss + .inline(i -> i + .source("knn_score") + .lang("knn") + .params("field", JsonData.of("values")) + .params("query_value", JsonData.of(searchVector)) + .params("space_type", JsonData.of("cosinesimil")))))), + Doc.class); + + for (var hit : searchResponse.hits().hits()) { + LOGGER.info("Found {} with score {}", hit.source(), hit.score()); + } + + LOGGER.info("Deleting index {}", indexName); + client.indices().delete(r -> r.index(indexName)); + } catch (Exception e) { + LOGGER.error("Unexpected exception", e); + } + } + + public static class Doc { + private float[] values; + + public Doc() {} + + public Doc(float[] values) { + this.values = values; + } + + public static Doc rand(int dimensions) { + return new Doc(RandUtil.rand2SfArray(dimensions)); + } + + public float[] getValues() { + return values; + } + + public void setValues(float[] values) { + this.values = values; + } + + @Override + public String toString() { + return "{" + + "values=" + Arrays.toString(values) + + '}'; + } + } +} diff --git a/samples/src/main/resources/log4j2.xml b/samples/src/main/resources/log4j2.xml new file mode 100644 index 0000000000..a388d16da0 --- /dev/null +++ b/samples/src/main/resources/log4j2.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/settings.gradle.kts b/settings.gradle.kts index 2f1f60b93b..6d462291d4 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -32,3 +32,4 @@ rootProject.name = "opensearch-java" include("java-client") +include("samples") \ No newline at end of file