Skip to content

Commit

Permalink
Pinecone embedding store
Browse files Browse the repository at this point in the history
  • Loading branch information
jmartisk committed Nov 24, 2023
1 parent 71e181a commit fca573b
Show file tree
Hide file tree
Showing 29 changed files with 1,428 additions and 2 deletions.
10 changes: 10 additions & 0 deletions .github/workflows/build-against-langchain4j.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,16 @@ jobs:
- name: Build with Maven
run: mvn -B clean install -Dno-format
env:
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
PINECONE_ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }}
PINECONE_INDEX_NAME: ${{ secrets.PINECONE_INDEX_NAME }}
PINECONE_PROJECT_ID: ${{ secrets.PINECONE_PROJECT_ID }}

- name: Build with Maven (Native)
run: mvn -B install -Dnative -Dquarkus.native.container-build -Dnative.surefire.skip
env:
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
PINECONE_ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }}
PINECONE_INDEX_NAME: ${{ secrets.PINECONE_INDEX_NAME }}
PINECONE_PROJECT_ID: ${{ secrets.PINECONE_PROJECT_ID }}
10 changes: 10 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,16 @@ jobs:

- name: Build with Maven
run: mvn -B clean install -Dno-format
env: # note that secrets are not available when triggered by PR from a fork
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
PINECONE_ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }}
PINECONE_INDEX_NAME: ${{ secrets.PINECONE_INDEX_NAME }}
PINECONE_PROJECT_ID: ${{ secrets.PINECONE_PROJECT_ID }}

- name: Build with Maven (Native)
run: mvn -B install -Dnative -Dquarkus.native.container-build -Dnative.surefire.skip
env: # note that secrets are not available when triggered by PR from a fork
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
PINECONE_ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }}
PINECONE_INDEX_NAME: ${{ secrets.PINECONE_INDEX_NAME }}
PINECONE_PROJECT_ID: ${{ secrets.PINECONE_PROJECT_ID }}
3 changes: 2 additions & 1 deletion docs/modules/ROOT/nav.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
* Document Stores
** xref:redis-store.adoc[Redis Store]
** xref:chroma-store.adoc[Chroma Store]
** xref:pinecone-store.adoc[Pinecone Store]
** xref:in-process-embedding.adoc[In-Process Embeddings]
* Advanced topics
** xref:fault-tolerance.adoc[Fault Tolerance]
** xref:fault-tolerance.adoc[Fault Tolerance]
168 changes: 168 additions & 0 deletions docs/modules/ROOT/pages/includes/quarkus-langchain4j-pinecone.adoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@

:summaryTableId: quarkus-langchain4j-pinecone
[.configuration-legend]
icon:lock[title=Fixed at build time] Configuration property fixed at build time - All other configuration properties are overridable at runtime
[.configuration-reference.searchable, cols="80,.^10,.^10"]
|===

h|[[quarkus-langchain4j-pinecone_configuration]]link:#quarkus-langchain4j-pinecone_configuration[Configuration property]

h|Type
h|Default

a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.api-key]]`link:#quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.api-key[quarkus.langchain4j.pinecone.api-key]`


[.description]
--
The API key to Pinecone.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PINECONE_API_KEY+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_PINECONE_API_KEY+++`
endif::add-copy-button-to-env-var[]
--|string
|required icon:exclamation-circle[title=Configuration property is required]


a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.environment]]`link:#quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.environment[quarkus.langchain4j.pinecone.environment]`


[.description]
--
Environment name, e.g. gcp-starter or northamerica-northeast1-gcp.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PINECONE_ENVIRONMENT+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_PINECONE_ENVIRONMENT+++`
endif::add-copy-button-to-env-var[]
--|string
|required icon:exclamation-circle[title=Configuration property is required]


a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.project-id]]`link:#quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.project-id[quarkus.langchain4j.pinecone.project-id]`


[.description]
--
ID of the project.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PINECONE_PROJECT_ID+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_PINECONE_PROJECT_ID+++`
endif::add-copy-button-to-env-var[]
--|string
|required icon:exclamation-circle[title=Configuration property is required]


a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.index-name]]`link:#quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.index-name[quarkus.langchain4j.pinecone.index-name]`


[.description]
--
Name of the index within the project. If the index doesn't exist, it will be created.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PINECONE_INDEX_NAME+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_PINECONE_INDEX_NAME+++`
endif::add-copy-button-to-env-var[]
--|string
|required icon:exclamation-circle[title=Configuration property is required]


a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.dimension]]`link:#quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.dimension[quarkus.langchain4j.pinecone.dimension]`


[.description]
--
Dimension of the embeddings in the index. This is required only in case that the index doesn't exist yet and needs to be created.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PINECONE_DIMENSION+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_PINECONE_DIMENSION+++`
endif::add-copy-button-to-env-var[]
--|int
|


a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.namespace]]`link:#quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.namespace[quarkus.langchain4j.pinecone.namespace]`


[.description]
--
The namespace.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PINECONE_NAMESPACE+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_PINECONE_NAMESPACE+++`
endif::add-copy-button-to-env-var[]
--|string
|


a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.text-field-name]]`link:#quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.text-field-name[quarkus.langchain4j.pinecone.text-field-name]`


[.description]
--
The name of the field that contains the text segment.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PINECONE_TEXT_FIELD_NAME+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_PINECONE_TEXT_FIELD_NAME+++`
endif::add-copy-button-to-env-var[]
--|string
|`text`


a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.timeout]]`link:#quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.timeout[quarkus.langchain4j.pinecone.timeout]`


[.description]
--
The timeout duration for the Pinecone client. If not specified, 5 seconds will be used.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PINECONE_TIMEOUT+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_PINECONE_TIMEOUT+++`
endif::add-copy-button-to-env-var[]
--|link:https://docs.oracle.com/javase/8/docs/api/java/time/Duration.html[Duration]
link:#duration-note-anchor-{summaryTableId}[icon:question-circle[], title=More information about the Duration format]
|

|===
ifndef::no-duration-note[]
[NOTE]
[id='duration-note-anchor-{summaryTableId}']
.About the Duration format
====
To write duration values, use the standard `java.time.Duration` format.
See the link:https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/time/Duration.html#parse(java.lang.CharSequence)[Duration#parse() javadoc] for more information.

You can also use a simplified format, starting with a number:

* If the value is only a number, it represents time in seconds.
* If the value is a number followed by `ms`, it represents time in milliseconds.
In other cases, the simplified format is translated to the `java.time.Duration` format for parsing:

* If the value is a number followed by `h`, `m`, or `s`, it is prefixed with `PT`.
* If the value is a number followed by `d`, it is prefixed with `P`.
====
endif::no-duration-note[]
23 changes: 23 additions & 0 deletions docs/modules/ROOT/pages/pinecone-store.adoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
= Pinecone Store for Retrieval Augmented Generation (RAG)

include::./includes/attributes.adoc[]

When implementing Retrieval Augmented Generation (RAG), a robust document store is crucial. This guide demonstrates how to leverage a https://www.pinecone.io/[Pinecone] database as the document store.

== Leveraging the Pinecone Document Store

To make use of the Pinecone document store, you'll need to include the following dependency:

[source,xml,subs=attributes+]
----
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-pinecone</artifactId>
</dependency>
----

== Configuration Settings

Customize the behavior of the extension by exploring various configuration options:

include::includes/quarkus-langchain4j-pinecone.adoc[leveloffset=+1,opts=optional]
11 changes: 11 additions & 0 deletions docs/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
<!-- <artifactId>quarkus-langchain4j-chroma</artifactId>-->
<!-- <version>${project.version}</version>-->
<!-- </dependency>-->
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-pinecone</artifactId>
<version>${project.version}</version>
</dependency>

<!-- Make sure the doc is built after the other artifacts -->
<dependency>
Expand All @@ -55,6 +60,11 @@
<!-- <artifactId>quarkus-langchain4j-chroma-deployment</artifactId>-->
<!-- <version>${project.version}</version>-->
<!-- </dependency>-->
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-pinecone-deployment</artifactId>
<version>${project.version}</version>
</dependency>
<!-- <dependency>-->
<!-- <groupId>io.quarkiverse.langchain4j</groupId>-->
<!-- <artifactId>quarkus-langchain4j-hugging-face-deployment</artifactId>-->
Expand Down Expand Up @@ -112,6 +122,7 @@
<include>quarkus-langchain4j-huggingface.adoc</include>
<include>quarkus-langchain4j-redis.adoc</include>
<include>quarkus-langchain4j-chroma.adoc</include>
<include>quarkus-langchain4j-pinecone.adoc</include>
<filtering>false</filtering>
</resource>
<resource>
Expand Down
6 changes: 5 additions & 1 deletion docs/src/main/resources/application.properties
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
# Just there to satisfy mandatory properties
quarkus.langchain4j.redis.dimension=180
quarkus.langchain4j.redis.dimension=180
quarkus.langchain4j.pinecone.environment=abc
quarkus.langchain4j.pinecone.index-name=abc
quarkus.langchain4j.pinecone.project-id=abc
quarkus.langchain4j.pinecone.api-key=abc
73 changes: 73 additions & 0 deletions pinecone/deployment/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-pinecone-parent</artifactId>
<version>999-SNAPSHOT</version>
</parent>
<artifactId>quarkus-langchain4j-pinecone-deployment</artifactId>
<name>Quarkus Langchain4j - Pinecone embedding store - Deployment</name>
<dependencies>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-arc-deployment</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-rest-client-reactive-jackson-deployment</artifactId>
</dependency>
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-core-deployment</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-pinecone</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-junit5-internal</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<version>${assertj.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.wiremock</groupId>
<artifactId>wiremock-standalone</artifactId>
<version>${wiremock.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-embeddings-all-minilm-l6-v2-q</artifactId>
<version>${langchain4j.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<annotationProcessorPaths>
<path>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-extension-processor</artifactId>
<version>${quarkus.version}</version>
</path>
</annotationProcessorPaths>
</configuration>
</plugin>
</plugins>
</build>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package io.quarkiverse.langchain4j.pinecone;

import jakarta.enterprise.context.ApplicationScoped;

import org.jboss.jandex.DotName;

import dev.langchain4j.store.embedding.EmbeddingStore;
import io.quarkiverse.langchain4j.pinecone.runtime.PineconeConfig;
import io.quarkiverse.langchain4j.pinecone.runtime.PineconeRecorder;
import io.quarkus.arc.deployment.SyntheticBeanBuildItem;
import io.quarkus.deployment.annotations.BuildProducer;
import io.quarkus.deployment.annotations.BuildStep;
import io.quarkus.deployment.annotations.ExecutionTime;
import io.quarkus.deployment.annotations.Record;
import io.quarkus.deployment.builditem.FeatureBuildItem;

public class PineconeProcessor {

public static final DotName PINECONE_EMBEDDING_STORE = DotName.createSimple(PineconeEmbeddingStore.class);
private static final String FEATURE = "langchain4j-pinecone";

@BuildStep
FeatureBuildItem feature() {
return new FeatureBuildItem(FEATURE);
}

@BuildStep
@Record(ExecutionTime.RUNTIME_INIT)
public void createBean(
BuildProducer<SyntheticBeanBuildItem> beanProducer,
PineconeRecorder recorder,
PineconeConfig config) {
beanProducer.produce(SyntheticBeanBuildItem
.configure(PINECONE_EMBEDDING_STORE)
.types(EmbeddingStore.class)
.defaultBean()
.setRuntimeInit()
.defaultBean()
.scope(ApplicationScoped.class)
.supplier(recorder.pineconeStoreSupplier(config))
.done());
}

}
Loading

0 comments on commit fca573b

Please sign in to comment.