diff --git a/.github/workflows/build-against-langchain4j.yml b/.github/workflows/build-against-langchain4j.yml
index 595637b50..3238920b2 100644
--- a/.github/workflows/build-against-langchain4j.yml
+++ b/.github/workflows/build-against-langchain4j.yml
@@ -13,6 +13,8 @@ jobs:
build:
name: Build on ${{ matrix.os }} - ${{ matrix.java }}
strategy:
+ # PineconeEmbeddingStoreTest uses a single shared index, we can't run multiple CI runs on it at once
+ max-parallel: 1
fail-fast: false
matrix:
os: [ubuntu-latest]
@@ -41,6 +43,16 @@ jobs:
- name: Build with Maven
run: mvn -B clean install -Dno-format
+ env:
+ PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
+ PINECONE_ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }}
+ PINECONE_INDEX_NAME: ${{ secrets.PINECONE_INDEX_NAME }}
+ PINECONE_PROJECT_ID: ${{ secrets.PINECONE_PROJECT_ID }}
- name: Build with Maven (Native)
run: mvn -B install -Dnative -Dquarkus.native.container-build -Dnative.surefire.skip
+ env:
+ PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
+ PINECONE_ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }}
+ PINECONE_INDEX_NAME: ${{ secrets.PINECONE_INDEX_NAME }}
+ PINECONE_PROJECT_ID: ${{ secrets.PINECONE_PROJECT_ID }}
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index d42ab99ec..b1f88e837 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -34,6 +34,9 @@ jobs:
build:
name: Build on ${{ matrix.os }} - ${{ matrix.java }}
strategy:
+ # PineconeEmbeddingStoreTest uses a single shared index, we can't run multiple CI runs on it at once
+ # If we have PINECONE_API_KEY available, then the test will run, so set max-parallel to 1
+ max-parallel: ${{ secrets.PINECONE_API_KEY && 1 || 16 }}
fail-fast: false
matrix:
os: [ubuntu-latest]
@@ -54,6 +57,16 @@ jobs:
- name: Build with Maven
run: mvn -B clean install -Dno-format
+ env: # note that secrets are not available when triggered by PR from a fork, so some tests will be skipped
+ PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
+ PINECONE_ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }}
+ PINECONE_INDEX_NAME: ${{ secrets.PINECONE_INDEX_NAME }}
+ PINECONE_PROJECT_ID: ${{ secrets.PINECONE_PROJECT_ID }}
- name: Build with Maven (Native)
run: mvn -B install -Dnative -Dquarkus.native.container-build -Dnative.surefire.skip
+ env: # note that secrets are not available when triggered by PR from a fork, so some tests will be skipped
+ PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
+ PINECONE_ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }}
+ PINECONE_INDEX_NAME: ${{ secrets.PINECONE_INDEX_NAME }}
+ PINECONE_PROJECT_ID: ${{ secrets.PINECONE_PROJECT_ID }}
diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc
index 2966f3e16..a66b7ab0e 100644
--- a/docs/modules/ROOT/nav.adoc
+++ b/docs/modules/ROOT/nav.adoc
@@ -14,8 +14,9 @@
* Document Stores
** xref:redis-store.adoc[Redis Store]
** xref:chroma-store.adoc[Chroma Store]
+** xref:pinecone-store.adoc[Pinecone Store]
** xref:in-process-embedding.adoc[In-Process Embeddings]
** xref:csv.adoc[Loading CSV files]
* Advanced topics
-** xref:fault-tolerance.adoc[Fault Tolerance]
\ No newline at end of file
+** xref:fault-tolerance.adoc[Fault Tolerance]
diff --git a/docs/modules/ROOT/pages/includes/quarkus-langchain4j-pinecone.adoc b/docs/modules/ROOT/pages/includes/quarkus-langchain4j-pinecone.adoc
new file mode 100644
index 000000000..04197c7ed
--- /dev/null
+++ b/docs/modules/ROOT/pages/includes/quarkus-langchain4j-pinecone.adoc
@@ -0,0 +1,168 @@
+
+:summaryTableId: quarkus-langchain4j-pinecone
+[.configuration-legend]
+icon:lock[title=Fixed at build time] Configuration property fixed at build time - All other configuration properties are overridable at runtime
+[.configuration-reference.searchable, cols="80,.^10,.^10"]
+|===
+
+h|[[quarkus-langchain4j-pinecone_configuration]]link:#quarkus-langchain4j-pinecone_configuration[Configuration property]
+
+h|Type
+h|Default
+
+a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.api-key]]`link:#quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.api-key[quarkus.langchain4j.pinecone.api-key]`
+
+
+[.description]
+--
+The API key to Pinecone.
+
+ifdef::add-copy-button-to-env-var[]
+Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PINECONE_API_KEY+++[]
+endif::add-copy-button-to-env-var[]
+ifndef::add-copy-button-to-env-var[]
+Environment variable: `+++QUARKUS_LANGCHAIN4J_PINECONE_API_KEY+++`
+endif::add-copy-button-to-env-var[]
+--|string
+|required icon:exclamation-circle[title=Configuration property is required]
+
+
+a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.environment]]`link:#quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.environment[quarkus.langchain4j.pinecone.environment]`
+
+
+[.description]
+--
+Environment name, e.g. gcp-starter or northamerica-northeast1-gcp.
+
+ifdef::add-copy-button-to-env-var[]
+Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PINECONE_ENVIRONMENT+++[]
+endif::add-copy-button-to-env-var[]
+ifndef::add-copy-button-to-env-var[]
+Environment variable: `+++QUARKUS_LANGCHAIN4J_PINECONE_ENVIRONMENT+++`
+endif::add-copy-button-to-env-var[]
+--|string
+|required icon:exclamation-circle[title=Configuration property is required]
+
+
+a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.project-id]]`link:#quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.project-id[quarkus.langchain4j.pinecone.project-id]`
+
+
+[.description]
+--
+ID of the project.
+
+ifdef::add-copy-button-to-env-var[]
+Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PINECONE_PROJECT_ID+++[]
+endif::add-copy-button-to-env-var[]
+ifndef::add-copy-button-to-env-var[]
+Environment variable: `+++QUARKUS_LANGCHAIN4J_PINECONE_PROJECT_ID+++`
+endif::add-copy-button-to-env-var[]
+--|string
+|required icon:exclamation-circle[title=Configuration property is required]
+
+
+a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.index-name]]`link:#quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.index-name[quarkus.langchain4j.pinecone.index-name]`
+
+
+[.description]
+--
+Name of the index within the project. If the index doesn't exist, it will be created.
+
+ifdef::add-copy-button-to-env-var[]
+Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PINECONE_INDEX_NAME+++[]
+endif::add-copy-button-to-env-var[]
+ifndef::add-copy-button-to-env-var[]
+Environment variable: `+++QUARKUS_LANGCHAIN4J_PINECONE_INDEX_NAME+++`
+endif::add-copy-button-to-env-var[]
+--|string
+|required icon:exclamation-circle[title=Configuration property is required]
+
+
+a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.dimension]]`link:#quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.dimension[quarkus.langchain4j.pinecone.dimension]`
+
+
+[.description]
+--
+Dimension of the embeddings in the index. This is required only in case that the index doesn't exist yet and needs to be created.
+
+ifdef::add-copy-button-to-env-var[]
+Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PINECONE_DIMENSION+++[]
+endif::add-copy-button-to-env-var[]
+ifndef::add-copy-button-to-env-var[]
+Environment variable: `+++QUARKUS_LANGCHAIN4J_PINECONE_DIMENSION+++`
+endif::add-copy-button-to-env-var[]
+--|int
+|
+
+
+a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.namespace]]`link:#quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.namespace[quarkus.langchain4j.pinecone.namespace]`
+
+
+[.description]
+--
+The namespace.
+
+ifdef::add-copy-button-to-env-var[]
+Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PINECONE_NAMESPACE+++[]
+endif::add-copy-button-to-env-var[]
+ifndef::add-copy-button-to-env-var[]
+Environment variable: `+++QUARKUS_LANGCHAIN4J_PINECONE_NAMESPACE+++`
+endif::add-copy-button-to-env-var[]
+--|string
+|
+
+
+a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.text-field-name]]`link:#quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.text-field-name[quarkus.langchain4j.pinecone.text-field-name]`
+
+
+[.description]
+--
+The name of the field that contains the text segment.
+
+ifdef::add-copy-button-to-env-var[]
+Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PINECONE_TEXT_FIELD_NAME+++[]
+endif::add-copy-button-to-env-var[]
+ifndef::add-copy-button-to-env-var[]
+Environment variable: `+++QUARKUS_LANGCHAIN4J_PINECONE_TEXT_FIELD_NAME+++`
+endif::add-copy-button-to-env-var[]
+--|string
+|`text`
+
+
+a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.timeout]]`link:#quarkus-langchain4j-pinecone_quarkus.langchain4j.pinecone.timeout[quarkus.langchain4j.pinecone.timeout]`
+
+
+[.description]
+--
+The timeout duration for the Pinecone client. If not specified, 5 seconds will be used.
+
+ifdef::add-copy-button-to-env-var[]
+Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PINECONE_TIMEOUT+++[]
+endif::add-copy-button-to-env-var[]
+ifndef::add-copy-button-to-env-var[]
+Environment variable: `+++QUARKUS_LANGCHAIN4J_PINECONE_TIMEOUT+++`
+endif::add-copy-button-to-env-var[]
+--|link:https://docs.oracle.com/javase/8/docs/api/java/time/Duration.html[Duration]
+ link:#duration-note-anchor-{summaryTableId}[icon:question-circle[], title=More information about the Duration format]
+|
+
+|===
+ifndef::no-duration-note[]
+[NOTE]
+[id='duration-note-anchor-{summaryTableId}']
+.About the Duration format
+====
+To write duration values, use the standard `java.time.Duration` format.
+See the link:https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/time/Duration.html#parse(java.lang.CharSequence)[Duration#parse() javadoc] for more information.
+
+You can also use a simplified format, starting with a number:
+
+* If the value is only a number, it represents time in seconds.
+* If the value is a number followed by `ms`, it represents time in milliseconds.
+
+In other cases, the simplified format is translated to the `java.time.Duration` format for parsing:
+
+* If the value is a number followed by `h`, `m`, or `s`, it is prefixed with `PT`.
+* If the value is a number followed by `d`, it is prefixed with `P`.
+====
+endif::no-duration-note[]
diff --git a/docs/modules/ROOT/pages/pinecone-store.adoc b/docs/modules/ROOT/pages/pinecone-store.adoc
new file mode 100644
index 000000000..33aa24759
--- /dev/null
+++ b/docs/modules/ROOT/pages/pinecone-store.adoc
@@ -0,0 +1,23 @@
+= Pinecone Store for Retrieval Augmented Generation (RAG)
+
+include::./includes/attributes.adoc[]
+
+When implementing Retrieval Augmented Generation (RAG), a robust document store is crucial. This guide demonstrates how to leverage a https://www.pinecone.io/[Pinecone] database as the document store.
+
+== Leveraging the Pinecone Document Store
+
+To make use of the Pinecone document store, you'll need to include the following dependency:
+
+[source,xml,subs=attributes+]
+----
+
+ io.quarkiverse.langchain4j
+ quarkus-langchain4j-pinecone
+
+----
+
+== Configuration Settings
+
+Customize the behavior of the extension by exploring various configuration options:
+
+include::includes/quarkus-langchain4j-pinecone.adoc[leveloffset=+1,opts=optional]
diff --git a/docs/pom.xml b/docs/pom.xml
index 048f3a5ef..c9f6debda 100644
--- a/docs/pom.xml
+++ b/docs/pom.xml
@@ -37,6 +37,11 @@
+
+
+
+
+
@@ -57,6 +62,11 @@
+
+
+
+
+
@@ -112,6 +122,7 @@
quarkus-langchain4j-huggingface.adocquarkus-langchain4j-redis.adocquarkus-langchain4j-chroma.adoc
+ quarkus-langchain4j-pinecone.adocfalse
diff --git a/docs/src/main/resources/application.properties b/docs/src/main/resources/application.properties
index 74eea3b87..e2275a79f 100644
--- a/docs/src/main/resources/application.properties
+++ b/docs/src/main/resources/application.properties
@@ -1,2 +1,6 @@
# Just there to satisfy mandatory properties
-quarkus.langchain4j.redis.dimension=180
\ No newline at end of file
+quarkus.langchain4j.redis.dimension=180
+quarkus.langchain4j.pinecone.environment=abc
+quarkus.langchain4j.pinecone.index-name=abc
+quarkus.langchain4j.pinecone.project-id=abc
+quarkus.langchain4j.pinecone.api-key=abc
\ No newline at end of file
diff --git a/pinecone/deployment/pom.xml b/pinecone/deployment/pom.xml
new file mode 100644
index 000000000..73b6bb340
--- /dev/null
+++ b/pinecone/deployment/pom.xml
@@ -0,0 +1,73 @@
+
+
+ 4.0.0
+
+ io.quarkiverse.langchain4j
+ quarkus-langchain4j-pinecone-parent
+ 999-SNAPSHOT
+
+ quarkus-langchain4j-pinecone-deployment
+ Quarkus Langchain4j - Pinecone embedding store - Deployment
+
+
+ io.quarkus
+ quarkus-arc-deployment
+
+
+ io.quarkus
+ quarkus-rest-client-reactive-jackson-deployment
+
+
+ io.quarkiverse.langchain4j
+ quarkus-langchain4j-core-deployment
+ ${project.version}
+
+
+ io.quarkiverse.langchain4j
+ quarkus-langchain4j-pinecone
+ ${project.version}
+
+
+ io.quarkus
+ quarkus-junit5-internal
+ test
+
+
+ org.assertj
+ assertj-core
+ ${assertj.version}
+ test
+
+
+ org.wiremock
+ wiremock-standalone
+ ${wiremock.version}
+ test
+
+
+ dev.langchain4j
+ langchain4j-embeddings-all-minilm-l6-v2-q
+ ${langchain4j.version}
+ test
+
+
+
+
+
+ maven-compiler-plugin
+
+
+
+ io.quarkus
+ quarkus-extension-processor
+ ${quarkus.version}
+
+
+
+
+
+
+
+
diff --git a/pinecone/deployment/src/main/java/io/quarkiverse/langchain4j/pinecone/PineconeProcessor.java b/pinecone/deployment/src/main/java/io/quarkiverse/langchain4j/pinecone/PineconeProcessor.java
new file mode 100644
index 000000000..660eaa9ba
--- /dev/null
+++ b/pinecone/deployment/src/main/java/io/quarkiverse/langchain4j/pinecone/PineconeProcessor.java
@@ -0,0 +1,44 @@
+package io.quarkiverse.langchain4j.pinecone;
+
+import jakarta.enterprise.context.ApplicationScoped;
+
+import org.jboss.jandex.DotName;
+
+import dev.langchain4j.store.embedding.EmbeddingStore;
+import io.quarkiverse.langchain4j.pinecone.runtime.PineconeConfig;
+import io.quarkiverse.langchain4j.pinecone.runtime.PineconeRecorder;
+import io.quarkus.arc.deployment.SyntheticBeanBuildItem;
+import io.quarkus.deployment.annotations.BuildProducer;
+import io.quarkus.deployment.annotations.BuildStep;
+import io.quarkus.deployment.annotations.ExecutionTime;
+import io.quarkus.deployment.annotations.Record;
+import io.quarkus.deployment.builditem.FeatureBuildItem;
+
+public class PineconeProcessor {
+
+ public static final DotName PINECONE_EMBEDDING_STORE = DotName.createSimple(PineconeEmbeddingStore.class);
+ private static final String FEATURE = "langchain4j-pinecone";
+
+ @BuildStep
+ FeatureBuildItem feature() {
+ return new FeatureBuildItem(FEATURE);
+ }
+
+ @BuildStep
+ @Record(ExecutionTime.RUNTIME_INIT)
+ public void createBean(
+ BuildProducer beanProducer,
+ PineconeRecorder recorder,
+ PineconeConfig config) {
+ beanProducer.produce(SyntheticBeanBuildItem
+ .configure(PINECONE_EMBEDDING_STORE)
+ .types(EmbeddingStore.class)
+ .defaultBean()
+ .setRuntimeInit()
+ .defaultBean()
+ .scope(ApplicationScoped.class)
+ .supplier(recorder.pineconeStoreSupplier(config))
+ .done());
+ }
+
+}
diff --git a/pinecone/deployment/src/test/java/io/quarkiverse/langchain4j/pinecone/deployment/PineconeEmbeddingStoreTest.java b/pinecone/deployment/src/test/java/io/quarkiverse/langchain4j/pinecone/deployment/PineconeEmbeddingStoreTest.java
new file mode 100644
index 000000000..b6ef2d2f7
--- /dev/null
+++ b/pinecone/deployment/src/test/java/io/quarkiverse/langchain4j/pinecone/deployment/PineconeEmbeddingStoreTest.java
@@ -0,0 +1,298 @@
+package io.quarkiverse.langchain4j.pinecone.deployment;
+
+import static dev.langchain4j.internal.Utils.randomUUID;
+import static java.util.Arrays.asList;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.data.Percentage.withPercentage;
+
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+import jakarta.inject.Inject;
+
+import org.jboss.shrinkwrap.api.ShrinkWrap;
+import org.jboss.shrinkwrap.api.asset.StringAsset;
+import org.jboss.shrinkwrap.api.spec.JavaArchive;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
+import org.junit.jupiter.api.extension.RegisterExtension;
+
+import dev.langchain4j.data.document.Metadata;
+import dev.langchain4j.data.embedding.Embedding;
+import dev.langchain4j.data.segment.TextSegment;
+import dev.langchain4j.model.embedding.AllMiniLmL6V2QuantizedEmbeddingModel;
+import dev.langchain4j.model.embedding.EmbeddingModel;
+import dev.langchain4j.store.embedding.CosineSimilarity;
+import dev.langchain4j.store.embedding.EmbeddingMatch;
+import dev.langchain4j.store.embedding.RelevanceScore;
+import io.quarkiverse.langchain4j.pinecone.PineconeEmbeddingStore;
+import io.quarkiverse.langchain4j.pinecone.runtime.DeleteRequest;
+import io.quarkiverse.langchain4j.pinecone.runtime.PineconeVectorOperationsApi;
+import io.quarkiverse.langchain4j.pinecone.runtime.QueryRequest;
+import io.quarkiverse.langchain4j.pinecone.runtime.VectorMatch;
+import io.quarkus.logging.Log;
+import io.quarkus.test.QuarkusUnitTest;
+
+/**
+ * Prerequisites for this test: A pinecone index must exist (can be in the starter region)
+ * and the following environment variables must be set accordingly:
+ * PINECONE_API_KEY, PINECONE_ENVIRONMENT, PINECONE_PROJECT_ID and PINECONE_INDEX_NAME
+ *
+ * These are set as GitHub secrets in the main repository. GitHub doesn't
+ * pass them to workflows triggered from forks though, so this test only
+ * runs with the nightly CI workflow, or for PRs submitted from the main
+ * quarkiverse repository (NOT from a fork).
+ *
+ *
+ * Original data in the index will be lost during the test.
+ *