Merge branch 'main' into main

datastaxdevs · Jul 2, 2024 · e70dfd7 · e70dfd7
2 parents d26d896 + 25440a2
commit e70dfd7
Show file tree

Hide file tree

Showing 100 changed files with 3,193 additions and 374 deletions.
diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
@@ -31,11 +31,11 @@ jobs:
         java_version: [8, 11, 17, 21]
         include:
           - java_version: '8'
-            included_modules: '-pl !code-execution-engines/langchain4j-code-execution-engine-graalvm-polyglot,!langchain4j-cassandra,!langchain4j-astradb,!langchain4j-infinispan,!langchain4j-neo4j,!langchain4j-opensearch,!langchain4j-azure-ai-search'
+            included_modules: '-pl !langchain4j-jlama,!code-execution-engines/langchain4j-code-execution-engine-graalvm-polyglot,!langchain4j-cassandra,!langchain4j-astradb, !langchain4j-infinispan,!langchain4j-neo4j,!langchain4j-opensearch,!langchain4j-azure-ai-search'
           - java_version: '11'
-            included_modules: '-pl !code-execution-engines/langchain4j-code-execution-engine-graalvm-polyglot,!langchain4j-infinispan,!langchain4j-neo4j'
+            included_modules: '-pl !langchain4j-jlama,!code-execution-engines/langchain4j-code-execution-engine-graalvm-polyglot,!langchain4j-infinispan,!langchain4j-neo4j'
           - java_version: '17'
-            included_modules: '-pl !code-execution-engines/langchain4j-code-execution-engine-graalvm-polyglot'
+            included_modules: '-pl !langchain4j-jlama,!code-execution-engines/langchain4j-code-execution-engine-graalvm-polyglot'
           - java_version: '21'
             included_modules: ''
     runs-on: ubuntu-latest

diff --git a/.github/workflows/nightly.yaml b/.github/workflows/nightly.yaml
@@ -13,11 +13,11 @@ jobs:
         java_version: [ 8, 11, 17, 21 ]
         include:
           - java_version: '8'
-            included_modules: '-pl !langchain4j-local-ai,!langchain4j-milvus,!code-execution-engines/langchain4j-code-execution-engine-graalvm-polyglot,!langchain4j-cassandra,!langchain4j-infinispan,!langchain4j-neo4j,!langchain4j-opensearch,!langchain4j-azure-ai-search'
+            included_modules: '-pl !langchain4j-jlama,!langchain4j-local-ai,!langchain4j-milvus,!code-execution-engines/langchain4j-code-execution-engine-graalvm-polyglot,!langchain4j-cassandra,!langchain4j-infinispan,!langchain4j-neo4j,!langchain4j-opensearch,!langchain4j-azure-ai-search'
           - java_version: '11'
-            included_modules: '-pl !langchain4j-local-ai,!langchain4j-milvus,!code-execution-engines/langchain4j-code-execution-engine-graalvm-polyglot,!langchain4j-infinispan,!langchain4j-neo4j'
+            included_modules: '-pl !langchain4j-jlama,!langchain4j-local-ai,!langchain4j-milvus,!code-execution-engines/langchain4j-code-execution-engine-graalvm-polyglot,!langchain4j-infinispan,!langchain4j-neo4j'
           - java_version: '17'
-            included_modules: '-pl !langchain4j-local-ai,!langchain4j-milvus,!code-execution-engines/langchain4j-code-execution-engine-graalvm-polyglot'
+            included_modules: '-pl !langchain4j-jlama,!langchain4j-local-ai,!langchain4j-milvus,!code-execution-engines/langchain4j-code-execution-engine-graalvm-polyglot'
           - java_version: '21'
             included_modules: '-pl !langchain4j-local-ai,!langchain4j-milvus'
     runs-on: ubuntu-latest

diff --git a/docs/docs/integrations/embedding-models/1-in-process.md b/docs/docs/integrations/embedding-models/1-in-process.md
@@ -4,6 +4,25 @@ sidebar_position: 1
 
 # In-process (ONNX)
 
-All in-process embedding models can be found [here](https://github.com/langchain4j/langchain4j-embeddings).
+LangChain4j provides a few popular local embedding models packaged as maven dependencies.
+They are powered by [ONNX runtime](https://onnxruntime.ai/docs/get-started/with-java.html)
+and are running in the same java process.
 
-TODO
+Each model is provided in 2 flavours: original and quantized (has a `-q` suffix in maven artifact name and `Quantized` in the class name).
+
+The complete list of all embedding models can be found [here](https://github.com/langchain4j/langchain4j-embeddings).
+
+
+## Custom models
+
+Many models (e.g., from [Hugging Face](https://huggingface.co/)) can be used,
+as long as they are in the ONNX format.
+
+Information on how to convert models into ONNX format can be found [here](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model).
+
+Many models already converted to ONNX format are available [here](https://huggingface.co/Xenova).
+
+
+## Examples
+
+- [InProcessEmbeddingModelExamples](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/embedding/model/InProcessEmbeddingModelExamples.java)
diff --git a/docs/docs/integrations/embedding-models/amazon-bedrock.md b/docs/docs/integrations/embedding-models/amazon-bedrock.md
@@ -2,4 +2,24 @@
 sidebar_position: 2
 ---
 
-# Amazon Bedrock
+# Amazon Bedrock
+
+
+## Maven Dependency
+
+```xml
+<dependency>
+    <groupId>dev.langchain4j</groupId>
+    <artifactId>langchain4j-bedrock</artifactId>
+    <version>0.31.0</version>
+</dependency>
+```
+
+## APIs
+
+- `BedrockTitanEmbeddingModel`
+
+
+## Examples
+
+- [BedrockEmbeddingIT](https://github.com/langchain4j/langchain4j/blob/main/langchain4j-bedrock/src/test/java/dev/langchain4j/model/bedrock/BedrockEmbeddingIT.java)
diff --git a/docs/docs/integrations/embedding-models/google-vertex-ai.md b/docs/docs/integrations/embedding-models/google-vertex-ai.md
@@ -17,14 +17,14 @@ Add the following dependencies to your project's `pom.xml`:
 <dependency>
   <groupId>dev.langchain4j</groupId>
   <artifactId>langchain4j-vertex-ai</artifactId>
-  <version>{your-version}</version> <!-- Specify langchain4j version here -->
+  <version>0.31.0</version>
 </dependency>
 ```
 
 or project's `build.gradle`:
 
 ```groovy
-implementation 'dev.langchain4j:langchain4j-vertex-ai:{your-version}'
+implementation 'dev.langchain4j:langchain4j-vertex-ai:0.31.0'
 ```
 
 ### Try out an example code:

diff --git a/docs/docs/integrations/embedding-models/jlama.md b/docs/docs/integrations/embedding-models/jlama.md
@@ -0,0 +1,111 @@
+---
+sidebar_position: 8
+---
+
+# Jlama
+[Jlama Project](https://github.com/tjake/Jlama)
+
+### Project setup
+
+To install langchain4j to your project, add the following dependency:
+
+For Maven project `pom.xml`
+
+```xml
+
+<dependency>
+    <groupId>dev.langchain4j</groupId>
+    <artifactId>langchain4j</artifactId>
+    <version>{your_version}</version>
+</dependency>
+
+<dependency>
+    <groupId>dev.langchain4j</groupId>
+    <artifactId>langchain4j-jlama</artifactId>
+    <version>{your_version}</version>
+</dependency>
+
+<dependency>
+    <groupId>com.github.tjake</groupId>
+    <artifactId>jlama-native</artifactId>
+    <!-- for faster inference. supports linux-x86_64, macos-x86_64/aarch_64, windows-x86_64 
+        Use https://github.com/trustin/os-maven-plugin to detect os and arch -->
+    <classifier>${os.detected.name}-${os.detected.arch}</classifier>
+    <version>${jlama.version}</version> <!-- Version from langchain4j-jlama pom -->
+</dependency>
+```
+
+For Gradle project `build.gradle`
+
+```groovy
+implementation 'dev.langchain4j:langchain4j:0.31.0'
+implementation 'dev.langchain4j:langchain4j-jlama:0.31.0'
+```
+
+## Embedding
+The Jlama Embeddings model allows you to embed sentences, and using it in your application is simple. 
+We provide a simple example to get you started with Jlama Embeddings model integration.
+You can use any `bert` based model from [HuggingFace](https://huggingface.co/models?library=safetensors&sort=trending), and specify them using the `owner/model-name` format.
+
+Create a class and add the following code.
+
+```java
+import dev.langchain4j.data.embedding.Embedding;
+import dev.langchain4j.data.segment.TextSegment;
+import dev.langchain4j.model.jlama.JlamaEmbeddingModel;
+import dev.langchain4j.model.embedding.EmbeddingModel;
+import dev.langchain4j.store.embedding.EmbeddingMatch;
+import dev.langchain4j.store.embedding.EmbeddingStore;
+import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore;
+
+import java.util.List;
+
+public class HelloWorld {
+    public static void main(String[] args) {
+        EmbeddingModel embeddingModel = JlamaEmbeddingModel
+                                        .modelName("intfloat/e5-small-v2")
+                                        .build();
+
+        // For simplicity, this example uses an in-memory store, but you can choose any external compatible store for production environments.
+        EmbeddingStore<TextSegment> embeddingStore = new InMemoryEmbeddingStore<>();
+
+        TextSegment segment1 = TextSegment.from("I like football.");
+        Embedding embedding1 = embeddingModel.embed(segment1).content();
+        embeddingStore.add(embedding1, segment1);
+
+        TextSegment segment2 = TextSegment.from("The weather is good today.");
+        Embedding embedding2 = embeddingModel.embed(segment2).content();
+        embeddingStore.add(embedding2, segment2);
+
+        String userQuery = "What is your favourite sport?";
+        Embedding queryEmbedding = embeddingModel.embed(userQuery).content();
+        int maxResults = 1;
+        List<EmbeddingMatch<TextSegment>> relevant = embeddingStore.findRelevant(queryEmbedding, maxResults);
+        EmbeddingMatch<TextSegment> embeddingMatch = relevant.get(0);
+
+        System.out.println("Question: " + userQuery); // What is your favourite sport?
+        System.out.println("Response: " + embeddingMatch.embedded().text()); // I like football.
+    }
+}
+```
+For this example, we'll add 2 text segments, but LangChain4j offers built-in support for loading documents from various sources:
+File System, URL, Amazon S3, Azure Blob Storage, GitHub, Tencent COS.
+Additionally, LangChain4j supports parsing multiple document types:
+text, pdf, doc, xls, ppt.
+
+The output will be similar to this:
+
+```plaintext
+Question: What is your favourite sport?
+Response: I like football.
+```
+
+Of course, you can combine Jlama Embeddings with RAG (Retrieval-Augmented Generation) techniques.
+
+In [RAG](/tutorials/rag) you will learn how to use RAG techniques for ingestion, retrieval and Advanced Retrieval with LangChain4j.
+
+A lot of parameters are set behind the scenes, such as timeout, model type and model parameters.
+In [Set Model Parameters](/tutorials/model-parameters) you will learn how to set these parameters explicitly.
+
+### More examples
+If you want to check more examples, you can find them in the [langchain4j-examples](https://github.com/langchain4j/langchain4j-examples) project.
diff --git a/docs/docs/integrations/embedding-models/mistral-ai.md b/docs/docs/integrations/embedding-models/mistral-ai.md
@@ -16,21 +16,21 @@ For Maven project `pom.xml`
 <dependency>
     <groupId>dev.langchain4j</groupId>
     <artifactId>langchain4j</artifactId>
-    <version>{your-version}</version> <!-- Specify your version here -->
+    <version>0.31.0</version>
 </dependency>
 
 <dependency>
     <groupId>dev.langchain4j</groupId>
     <artifactId>langchain4j-mistral-ai</artifactId>
-    <version>{your-version}</version>
+    <version>0.31.0</version>
 </dependency>
 ```
 
 For Gradle project `build.gradle`
 
 ```groovy
-implementation 'dev.langchain4j:langchain4j:{your-version}'
-implementation 'dev.langchain4j:langchain4j-mistral-ai:{your-version}'
+implementation 'dev.langchain4j:langchain4j:0.31.0'
+implementation 'dev.langchain4j:langchain4j-mistral-ai:0.31.0'
 ```
 #### API Key setup
 Add your MistralAI API key to your project, you can create a class ```ApiKeys.java``` with the following code

diff --git a/docs/docs/integrations/embedding-stores/1-in-memory.md b/docs/docs/integrations/embedding-stores/1-in-memory.md
@@ -4,6 +4,43 @@ sidebar_position: 1
 
 # In-memory
 
-`InMemoryEmbeddingStore`
+LangChain4j provides a simple in-memory implementation of an `EmbeddingStore` interface:
+`InMemoryEmbeddingStore`.
+It is useful for fast prototyping and simple use cases.
+It keeps `Embedding`s and associated `TextSegment`s in memory.
+Search is also performed in memory.
+It can also be persisted and restored to/from a JSON string or a file.
 
-Tutorial coming soon
+### Maven Dependency
+
+```xml
+<dependency>
+    <groupId>dev.langchain4j</groupId>
+    <artifactId>langchain4j</artifactId>
+    <version>0.31.0</version>
+</dependency>
+```
+
+## APIs
+
+- `InMemoryEmbeddingStore` 
+
+
+## Persisting
+
+`InMemoryEmbeddingStore` can be serialized to a json string or a file:
+```java
+InMemoryEmbeddingStore<TextSegment> embeddingStore = new InMemoryEmbeddingStore<>();
+embeddingStore.addAll(embeddings, embedded);
+
+String serializedStore = embeddingStore.serializeToJson();
+InMemoryEmbeddingStore<TextSegment> deserializedStore = InMemoryEmbeddingStore.fromJson(serializedStore);
+
+String filePath = "/home/me/store.json";
+embeddingStore.serializeToFile(filePath);
+InMemoryEmbeddingStore<TextSegment> deserializedStore = InMemoryEmbeddingStore.fromFile(filePath);
+```
+
+## Examples
+
+- [InMemoryEmbeddingStoreExample](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/embedding/store/InMemoryEmbeddingStoreExample.java)
diff --git a/docs/docs/integrations/embedding-stores/azure-ai-search.md b/docs/docs/integrations/embedding-stores/azure-ai-search.md
@@ -4,7 +4,24 @@ sidebar_position: 3
 
 # Azure AI Search
 
-- `AzureAiSearchEmbeddingStore`
-- `AzureAiSearchContentRetriever`
 
-Tutorial coming soon
+## Maven Dependency
+
+```xml
+<dependency>
+    <groupId>dev.langchain4j</groupId>
+    <artifactId>langchain4j-azure-ai-search</artifactId>
+    <version>0.31.0</version>
+</dependency>
+```
+
+## APIs
+
+- `AzureAiSearchEmbeddingStore` - supports vector search
+- `AzureAiSearchContentRetriever` - supports vector, full-text, hybrid searches and re-ranking
+
+
+## Examples
+
+- [AzureAiSearchEmbeddingStoreIT](https://github.com/langchain4j/langchain4j/blob/main/langchain4j-azure-ai-search/src/test/java/dev/langchain4j/store/embedding/azure/search/AzureAiSearchEmbeddingStoreIT.java)
+- [AzureAiSearchContentRetrieverIT](https://github.com/langchain4j/langchain4j/blob/main/langchain4j-azure-ai-search/src/test/java/dev/langchain4j/rag/content/retriever/azure/search/AzureAiSearchContentRetrieverIT.java)
diff --git a/docs/docs/integrations/embedding-stores/azure-cosmos-mongo-vcore.md b/docs/docs/integrations/embedding-stores/azure-cosmos-mongo-vcore.md
@@ -4,4 +4,21 @@ sidebar_position: 4
 
 # Azure CosmosDB Mongo vCore
 
-Tutorial coming soon
+## Maven Dependency
+
+```xml
+<dependency>
+    <groupId>dev.langchain4j</groupId>
+    <artifactId>langchain4j-azure-cosmos-mongo-vcore</artifactId>
+    <version>0.31.0</version>
+</dependency>
+```
+
+## APIs
+
+- `AzureCosmosDbMongoVCoreEmbeddingStore`
+
+
+## Examples
+
+- [AzureCosmosDBMongoVCoreEmbeddingStoreIT](https://github.com/langchain4j/langchain4j/blob/main/langchain4j-azure-cosmos-mongo-vcore/src/test/java/dev/langchain4j/store/embedding/azure/cosmos/mongo/vcore/AzureCosmosDBMongoVCoreEmbeddingStoreIT.java)
diff --git a/docs/docs/integrations/embedding-stores/azure-cosmos-nosql.md b/docs/docs/integrations/embedding-stores/azure-cosmos-nosql.md
@@ -0,0 +1,24 @@
+---
+sidebar_position: 4
+---
+
+# Azure CosmosDB NoSQL
+
+## Maven Dependency
+
+```xml
+<dependency>
+    <groupId>dev.langchain4j</groupId>
+    <artifactId>langchain4j-azure-cosmos-nosql</artifactId>
+    <version>0.31.0</version>
+</dependency>
+```
+
+## APIs
+
+- `AzureCosmosDbNoSqlEmbeddingStore`
+
+
+## Examples
+
+- [AzureCosmosDbNoSqlEmbeddingStoreIT](https://github.com/langchain4j/langchain4j/blob/main/langchain4j-azure-cosmos-nosql/src/test/java/dev/langchain4j/store/embedding/azure/cosmos/nosql/AzureCosmosDbNoSqlEmbeddingStoreIT.java)
diff --git a/docs/docs/integrations/embedding-stores/chroma.md b/docs/docs/integrations/embedding-stores/chroma.md
@@ -4,7 +4,24 @@ sidebar_position: 6
 
 # Chroma
 
-[Integration](https://github.com/langchain4j/langchain4j-examples/blob/main/chroma-example/src/main/java/ChromaEmbeddingStoreExample.java)
-with [Chroma](https://www.trychroma.com/)
+https://www.trychroma.com/
 
-Tutorial coming soon
+
+## Maven Dependency
+
+```xml
+<dependency>
+    <groupId>dev.langchain4j</groupId>
+    <artifactId>langchain4j-chroma</artifactId>
+    <version>0.31.0</version>
+</dependency>
+```
+
+## APIs
+
+- `ChromaEmbeddingStore`
+
+
+## Examples
+
+- [ChromaEmbeddingStoreExample](https://github.com/langchain4j/langchain4j-examples/blob/main/chroma-example/src/main/java/ChromaEmbeddingStoreExample.java)