diff --git a/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/chroma.adoc b/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/chroma.adoc index a0fe25f804..6b2d1b7ae3 100644 --- a/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/chroma.adoc +++ b/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/chroma.adoc @@ -44,6 +44,12 @@ With hostOrKey=null, the default is 'http://localhost:8000'. CALL apoc.vectordb.chroma.createCollection($host, 'test_collection', 'Cosine', 4, {}) ---- +.Example results +[opts="header"] +|=== +| name | metadata | database | id | tenant +| test_collection | {"size": 4, "hnsw:space": "cosine"} | default_database | 9c046861-f46f-417d-bd01-ca8c9f99aee5 | default_tenant +|=== .Delete a collection (it leverages https://docs.trychroma.com/usage-guide#creating-inspecting-and-deleting-collections[this API]) [source,cypher] @@ -51,6 +57,7 @@ CALL apoc.vectordb.chroma.createCollection($host, 'test_collection', 'Cosine', 4 CALL apoc.vectordb.chroma.deleteCollection($host, '', {}) ---- +which returns an empty result. .Upsert vectors (it leverages https://docs.trychroma.com/usage-guide#adding-data-to-a-collection[this API]) [source,cypher] @@ -63,6 +70,7 @@ CALL apoc.vectordb.qdrant.upsert($host, '', {}) ---- +which returns an empty result. .Get vectors (it leverages https://docs.trychroma.com/usage-guide#querying-a-collection[this API]) [source,cypher] @@ -149,9 +157,12 @@ CALL apoc.vectordb.chroma.query($host, '', +which returns a string that answers the `$question` by leveraging the embeddings of the db vector. + .Delete vectors (it leverages https://docs.trychroma.com/usage-guide#deleting-data-from-a-collection[this API]) [source,cypher] ---- CALL apoc.vectordb.chroma.delete($host, '', [1,2], {}) ---- +which returns an array of strings of deleted ids. For example, `["1", "2"]` \ No newline at end of file diff --git a/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/milvus.adoc b/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/milvus.adoc new file mode 100644 index 0000000000..971971856c --- /dev/null +++ b/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/milvus.adoc @@ -0,0 +1,269 @@ + += Milvus + +Here is a list of all available Milvus procedures: + +[opts=header, cols="1, 3"] +|=== +| name | description +| apoc.vectordb.milvus.createCollection(hostOrKey, collection, similarity, size, $config) | + Creates a collection, with the name specified in the 2nd parameter, and with the specified `similarity` and `size`. + The default endpoint is `/v2/vectordb/collections/create`. +| apoc.vectordb.milvus.deleteCollection(hostOrKey, collection, $config) | + Deletes a collection with the name specified in the 2nd parameter. + The default endpoint is `/v2/vectordb/collections/drop`. +| apoc.vectordb.milvus.upsert(hostOrKey, collection, vectors, $config) | + Upserts, in the collection with the name specified in the 2nd parameter, the vectors [{id: 'id', vector: '', medatada: ''}]. + The default endpoint is `/v2/vectordb/entities/upsert`. +| apoc.vectordb.milvus.delete(hostOrKey, collection, ids, $config) | + Delete the vectors with the specified `ids`. + The default endpoint is `/v2/vectordb/entities/delete`. +| apoc.vectordb.milvus.get(hostOrKey, collection, ids, $config) | + Get the vectors with the specified `ids`. + The default endpoint is `/v2/vectordb/entities/get`. +| apoc.vectordb.milvus.query(hostOrKey, collection, vector, filter, limit, $config) | + Retrieve closest vectors the the defined `vector`, `limit` of results, in the collection with the name specified in the 2nd parameter. + The default endpoint is `/v2/vectordb/entities/search`. +| apoc.vectordb.milvus.getAndUpdate(hostOrKey, collection, ids, $config) | + Get the vectors with the specified `ids`. + The default endpoint is `/v2/vectordb/entities/get`, and optionally creates/updates neo4j entities. +| apoc.vectordb.milvus.queryAndUpdate(hostOrKey, collection, vector, filter, limit, $config) | + Retrieve closest vectors the the defined `vector`, `limit` of results, in the collection with the name specified in the 2nd parameter, and optionally creates/updates neo4j entities. + The default endpoint is `/v2/vectordb/entities/search`. +|=== + +where the 1st parameter can be a key defined by the apoc config `apoc.milvus..host=myHost`. +With hostOrKey=null, the default host is 'http://localhost:19530'. + +== Examples + +Here is a list of example using a local installation using th default port `19531`. + + +.Create a collection (it leverages https://milvus.io/api-reference/restful/v2.4.x/v2/Collection%20(v2)/Create.md[this API]) +[source,cypher] +---- +CALL apoc.vectordb.milvus.createCollection('http://localhost:19531', 'test_collection', 'COSINE', 4, {}) +---- + +.Example results +[opts="header"] +|=== +| data | code +| null | 200 +|=== + +.Delete a collection (it leverages https://milvus.io/api-reference/restful/v2.4.x/v2/Collection%20(v2)/Drop.md[this API]) +[source,cypher] +---- +CALL apoc.vectordb.milvus.deleteCollection('http://localhost:19531', 'test_collection', {}) +---- + +.Example results +[opts="header"] +|=== +| data | code +| null | 200 +|=== + + +.Upsert vectors (it leverages https://milvus.io/api-reference/restful/v2.4.x/v2/Vector%20(v2)/Upsert.md[this API]) +[source,cypher] +---- +CALL apoc.vectordb.milvus.upsert('http://localhost:19531', 'test_collection', + [ + {id: 1, vector: [0.05, 0.61, 0.76, 0.74], metadata: {city: "Berlin", foo: "one"}}, + {id: 2, vector: [0.19, 0.81, 0.75, 0.11], metadata: {city: "London", foo: "two"}} + ], + {}) +---- + +.Example results +[opts="header"] +|=== +| data | code +| {"upsertCount": 2, "upsertId": [1, 2]} | 200 +|=== + + +.Get vectors (it leverages https://milvus.io/api-reference/restful/v2.4.x/v2/Vector%20(v2)/Get.md[this API]) +[source,cypher] +---- +CALL apoc.vectordb.milvus.get('http://localhost:19531', 'test_collection', [1,2], {}) +---- + + +.Example results +[opts="header"] +|=== +| score | metadata | id | vector | text | entity +| null | {city: "Berlin", foo: "one"} | null | null | null | null +| null | {city: "Berlin", foo: "two"} | null | null | null | null +| ... +|=== + +.Get vectors with `{allResults: true}` +[source,cypher] +---- +CALL apoc.vectordb.milvus.get('http://localhost:19531', 'test_collection', [1,2], {allResults: true, }) +---- + + +.Example results +[opts="header"] +|=== +| score | metadata | id | vector | text | entity +| null | {city: "Berlin", foo: "one"} | 1 | [...] | null | null +| null | {city: "Berlin", foo: "two"} | 2 | [...] | null | null +| ... +|=== + +.Query vectors (it leverages https://milvus.io/api-reference/restful/v2.4.x/v2/Vector%20(v2)/Query.md[this API]) +[source,cypher] +---- +CALL apoc.vectordb.milvus.query('http://localhost:19531', + 'test_collection', + [0.2, 0.1, 0.9, 0.7], + { must: + [ { key: "city", match: { value: "London" } } ] + }, + 5, + {allResults: true, }) +---- + + +.Example results +[opts="header"] +|=== +| score | metadata | id | vector | text | entity +| 1, | {city: "Berlin", foo: "one"} | 1 | [...] | null | null +| 0.1 | {city: "Berlin", foo: "two"} | 2 | [...] | null | null +| ... +|=== + + +We can define a mapping, to auto-create one/multiple nodes and relationships, by leveraging the vector metadata. + +For example, if we have created 2 vectors with the above upsert procedures, +we can populate some existing nodes (i.e. `(:Test {myId: 'one'})` and `(:Test {myId: 'two'})`): + + +[source,cypher] +---- +CALL apoc.vectordb.milvus.queryAndUpdate('http://localhost:19531', 'test_collection', + [0.2, 0.1, 0.9, 0.7], + {}, + 5, + { mapping: { + embeddingKey: "vect", + nodeLabel: "Test", + entityKey: "myId", + metadataKey: "foo" + } + }) +---- + +which populates the two nodes as: `(:Test {myId: 'one', city: 'Berlin', vect: [vector1]})` and `(:Test {myId: 'two', city: 'London', vect: [vector2]})`, +which will be returned in the `entity` column result. + + +We can also set the mapping configuration `mode` to `CREATE_IF_MISSING` (which creates nodes if not exist), `READ_ONLY` (to search for nodes/rels, without making updates) or `UPDATE_EXISTING` (default behavior): + +[source,cypher] +---- +CALL apoc.vectordb.milvus.queryAndUpdate('http://localhost:19531', 'test_collection', + [0.2, 0.1, 0.9, 0.7], + {}, + 5, + { mapping: { + mode: "CREATE_IF_MISSING", + embeddingKey: "vect", + nodeLabel: "Test", + entityKey: "myId", + metadataKey: "foo" + } + }) +---- + +which creates and 2 new nodes as above. + +Or, we can populate an existing relationship (i.e. `(:Start)-[:TEST {myId: 'one'}]->(:End)` and `(:Start)-[:TEST {myId: 'two'}]->(:End)`): + + +[source,cypher] +---- +CALL apoc.vectordb.milvus.queryAndUpdate('http://localhost:19531', 'test_collection', + [0.2, 0.1, 0.9, 0.7], + {}, + 5, + { mapping: { + embeddingKey: "vect", + relType: "TEST", + entityKey: "myId", + metadataKey: "foo" + } + }) +---- + +which populates the two relationships as: `()-[:TEST {myId: 'one', city: 'Berlin', vect: [vector1]}]-()` +and `()-[:TEST {myId: 'two', city: 'London', vect: [vector2]}]-()`, +which will be returned in the `entity` column result. + + +We can also use mapping for `apoc.vectordb.milvus.query` procedure, to search for nodes/rels fitting label/type and metadataKey, without making updates +(i.e. equivalent to `*.queryOrUpdate` procedure with mapping config having `mode: "READ_ONLY"`). + +For example, with the previous relationships, we can execute the following procedure, which just return the relationships in the column `rel`: + +[source,cypher] +---- +CALL apoc.vectordb.milvus.query('http://localhost:19531', 'test_collection', + [0.2, 0.1, 0.9, 0.7], + {}, + 5, + { mapping: { + embeddingKey: "vect", + relType: "TEST", + entityKey: "myId", + metadataKey: "foo" + } + }) +---- + +[NOTE] +==== +We can use mapping with `apoc.vectordb.milvus.get*` procedures as well +==== + +[NOTE] +==== +To optimize performances, we can choose what to `YIELD` with the `apoc.vectordb.milvus.query*` and the `apoc.vectordb.milvus.get*` procedures. + +For example, by executing a `CALL apoc.vectordb.milvus.query(...) YIELD metadata, score, id`, the RestAPI request will have an {"with_payload": false, "with_vectors": false}, +so that we do not return the other values that we do not need. +==== + +It is possible to execute vector db procedures together with the xref::ml/rag.adoc[apoc.ml.rag] as follow: + +[source,cypher] +---- +CALL apoc.vectordb.milvus.getAndUpdate($host, $collection, [, ], $conf) YIELD node, metadata, id, vector +WITH collect(node) as paths +CALL apoc.ml.rag(paths, $attributes, $question, $confPrompt) YIELD value +RETURN value +---- + +which returns a string that answers the `$question` by leveraging the embeddings of the db vector. + +.Delete vectors (it leverages https://milvus.io/api-reference/restful/v2.4.x/v2/Vector%20(v2)/Delete.md[this API]) +[source,cypher] +---- +CALL apoc.vectordb.milvus.delete('http://localhost:19531', 'test_collection', [1,2], {}) +---- + +.Example results +[opts="header"] +|=== +| data | code +| null | 200 +|=== \ No newline at end of file diff --git a/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/qdrant.adoc b/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/qdrant.adoc index 073fa9d146..bd671cd3b5 100644 --- a/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/qdrant.adoc +++ b/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/qdrant.adoc @@ -45,6 +45,12 @@ With hostOrKey=null, the default is 'http://localhost:6333'. CALL apoc.vectordb.qdrant.createCollection($hostOrKey, 'test_collection', 'Cosine', 4, {}) ---- +.Example results +[opts="header"] +|=== +| result | time | status +| true | 0.094182458 | "ok" +|=== .Delete a collection (it leverages https://qdrant.github.io/qdrant/redoc/index.html#tag/collections/operation/delete_collection[this API]) [source,cypher] @@ -52,6 +58,12 @@ CALL apoc.vectordb.qdrant.createCollection($hostOrKey, 'test_collection', 'Cosin CALL apoc.vectordb.qdrant.deleteCollection($hostOrKey, 'test_collection', {}) ---- +.Example results +[opts="header"] +|=== +| result | time | status +| true | 0.094182458 | "ok" +|=== .Upsert vectors (it leverages https://qdrant.github.io/qdrant/redoc/index.html#tag/points/operation/upsert_points[this API]) [source,cypher] @@ -64,6 +76,12 @@ CALL apoc.vectordb.qdrant.upsert($hostOrKey, 'test_collection', {}) ---- +.Example results +[opts="header"] +|=== +| result | time | status +| {"result": { "operation_id": 0, "status": "acknowledged" } } | 0.094182458 | "ok" +|=== .Get vectors (it leverages https://qdrant.github.io/qdrant/redoc/index.html#tag/points/operation/get_points[this API]) [source,cypher] @@ -202,8 +220,17 @@ so that we do not return the other values that we do not need. +which returns a string that answers the `$question` by leveraging the embeddings of the db vector. + .Delete vectors (it leverages https://qdrant.github.io/qdrant/redoc/index.html#tag/points/operation/delete_vectors[this API]) [source,cypher] ---- CALL apoc.vectordb.qdrant.delete($hostOrKey, 'test_collection', [1,2], {}) ---- + +.Example results +[opts="header"] +|=== +| result | time | status +| {"result": { "operation_id": 2, "status": "acknowledged" } } | 0.094182458 | "ok" +|=== \ No newline at end of file diff --git a/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/weaviate.adoc b/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/weaviate.adoc index be672294aa..f88f839be2 100644 --- a/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/weaviate.adoc +++ b/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/weaviate.adoc @@ -46,6 +46,13 @@ With hostOrKey=null, the default is 'http://localhost:8080/v1'. CALL apoc.vectordb.weaviate.createCollection($host, 'test_collection', 'Cosine', 4, {}) ---- +.Example results +[opts="header"] +|=== +| vectorizer | invertedIndexConfig | vectorIndexConfig | multiTenancyConfig | vectorIndexType | replicationConfig | shardingConfig | class | properties +| none | {"bm25": { "b": 0.75, "k1": 1.2 }, "stopwords": { "additions": null, "removals": null, "preset": "en" }, "cleanupIntervalSeconds": 60} | { "ef": -1, "dynamicEfMin": 100, "pq": { "centroids": 256, "trainingLimit": 100000, "encoder": { "type": "kmeans", "distribution": "log-normal" }, "enabled": false, "bitCompression": false, "segments": 0 }, "distance": "cosine", "skip": false, "dynamicEfFactor": 8, "bq": { "enabled": false }, "vectorCacheMaxObjects": 1000000000000, "cleanupIntervalSeconds": 300, "dynamicEfMax": 500, "efConstruction": 128, "flatSearchCutoff": 40000, "maxConnections": 64 } | { "enabled": false } | hnsw | { "factor": 1 } | { "desiredVirtualCount": 128, "desiredCount": 1, "actualCount": 1, "function": "murmur3", "virtualPerPhysical": 128, "strategy": "hash", "actualVirtualCount": 128, "key": "_id" } | TestCollection | null +|=== + .Create a collection against a remote connection using an API key (see https://weaviate.io/developers/weaviate/configuration/authentication[here]) [source,cypher] ---- @@ -56,6 +63,12 @@ CALL apoc.vectordb.weaviate.createCollection("https://.weavi {headers: {Authorization: 'Bearer '}}) ---- +.Example results +[opts="header"] +|=== +| vectorizer | invertedIndexConfig | vectorIndexConfig | multiTenancyConfig | vectorIndexType | replicationConfig | shardingConfig | class | properties +| none | {"bm25": { "b": 0.75, "k1": 1.2 }, "stopwords": { "additions": null, "removals": null, "preset": "en" }, "cleanupIntervalSeconds": 60} | { "ef": -1, "dynamicEfMin": 100, "pq": { "centroids": 256, "trainingLimit": 100000, "encoder": { "type": "kmeans", "distribution": "log-normal" }, "enabled": false, "bitCompression": false, "segments": 0 }, "distance": "cosine", "skip": false, "dynamicEfFactor": 8, "bq": { "enabled": false }, "vectorCacheMaxObjects": 1000000000000, "cleanupIntervalSeconds": 300, "dynamicEfMax": 500, "efConstruction": 128, "flatSearchCutoff": 40000, "maxConnections": 64 } | { "enabled": false } | hnsw | { "factor": 1 } | { "desiredVirtualCount": 128, "desiredCount": 1, "actualCount": 1, "function": "murmur3", "virtualPerPhysical": 128, "strategy": "hash", "actualVirtualCount": 128, "key": "_id" } | TestCollection | null +|=== .Delete a collection (it leverages https://weaviate.io/developers/weaviate/api/rest#tag/schema/delete/schema/{className}[this API]) @@ -64,6 +77,8 @@ CALL apoc.vectordb.weaviate.createCollection("https://.weavi CALL apoc.vectordb.weaviate.deleteCollection($host, 'test_collection', {}) ---- +which returns an empty result. + .Upsert vectors (it leverages https://weaviate.io/developers/weaviate/api/rest#tag/objects/post/objects[this API]) [source,cypher] @@ -76,6 +91,13 @@ CALL apoc.vectordb.weaviate.upsert($host, 'test_collection', {}) ---- +.Example results +[opts="header"] +|=== +| lastUpdateTimeUnix | vector | id | creationTimeUnix | class | properties +| 1721293838439 | [0.05, 0.61, 0.76, 0.74] | 8ef2b3a7-1e56-4ddd-b8c3-2ca8901ce308 | 1721293838439 | TestCollection | {city: "Berlin", foo: "one"} +| 1721293838439 | [0.19, 0.81, 0.75, 0.11] | 9ef2b3a7-1e56-4ddd-b8c3-2ca8901ce308 | 1721293838439 | TestCollection | {city: "London", foo: "two"} +|=== .Get vectors (it leverages https://weaviate.io/developers/weaviate/api/rest#tag/objects/get/objects/\{className\}/\{id\}[this API]) [source,cypher] @@ -215,8 +237,17 @@ so that we do not return the other values that we do not need. +which returns a string that answers the `$question` by leveraging the embeddings of the db vector. + .Delete vectors (it leverages https://weaviate.io/developers/weaviate/api/rest#tag/objects/delete/objects/\{className\}/\{id\}[this API]) [source,cypher] ---- CALL apoc.vectordb.weaviate.delete($host, 'test_collection', [1,2], {}) ---- + +.Example results +[opts="header"] +|=== +| value +| ["1", "2"] +|=== \ No newline at end of file