diff --git a/poetry.lock b/poetry.lock index 8310e1ec1..25e0bbda1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2238,6 +2238,17 @@ files = [ {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, ] +[[package]] +name = "jsonpath-python" +version = "1.0.6" +description = "A more powerful JSONPath implementation in modern python" +optional = true +python-versions = ">=3.6" +files = [ + {file = "jsonpath-python-1.0.6.tar.gz", hash = "sha256:dd5be4a72d8a2995c3f583cf82bf3cd1a9544cfdabf2d22595b67aff07349666"}, + {file = "jsonpath_python-1.0.6-py3-none-any.whl", hash = "sha256:1e3b78df579f5efc23565293612decee04214609208a2335884b3ee3f786b575"}, +] + [[package]] name = "kiwisolver" version = "1.4.5" @@ -2481,6 +2492,21 @@ huggingface-hub = {version = ">=0.19.0", extras = ["inference"]} llama-index-core = ">=0.10.1,<0.11.0" sentence-transformers = ">=2.6.1" +[[package]] +name = "llama-index-embeddings-mistralai" +version = "0.1.6" +description = "llama-index embeddings mistralai integration" +optional = true +python-versions = "<4.0,>=3.9" +files = [ + {file = "llama_index_embeddings_mistralai-0.1.6-py3-none-any.whl", hash = "sha256:d69d6fc0be8a1772aaf890bc036f2d575af46070b375a2649803c0eb9736ea1b"}, + {file = "llama_index_embeddings_mistralai-0.1.6.tar.gz", hash = "sha256:7c9cbf974b1e7d14ded34d3eb749a0d1a379fb151ab75115cc1ffdd08a96a045"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.1,<0.11.0" +mistralai = ">=1.0.0" + [[package]] name = "llama-index-embeddings-ollama" version = "0.1.2" @@ -3002,6 +3028,27 @@ files = [ {file = "minijinja-2.0.1.tar.gz", hash = "sha256:e774beffebfb8a1ad17e638ef70917cf5e94593f79acb8a8fff7d983169f3a4e"}, ] +[[package]] +name = "mistralai" +version = "1.0.3" +description = "Python Client SDK for the Mistral AI API." +optional = true +python-versions = "<4.0,>=3.8" +files = [ + {file = "mistralai-1.0.3-py3-none-any.whl", hash = "sha256:64af7c9192e64dc66b2da6d1c4d54a1324a881c21665a2f93d6b35d9de9f87c8"}, + {file = "mistralai-1.0.3.tar.gz", hash = "sha256:84f1a217666c76fec9d477ae266399b813c3ac32a4a348d2ecd5fe1c039b0667"}, +] + +[package.dependencies] +httpx = ">=0.27.0,<0.28.0" +jsonpath-python = ">=1.0.6,<2.0.0" +pydantic = ">=2.8.2,<2.9.0" +python-dateutil = ">=2.9.0.post0,<3.0.0" +typing-inspect = ">=0.9.0,<0.10.0" + +[package.extras] +gcp = ["google-auth (==2.27.0)", "requests (>=2.32.3,<3.0.0)"] + [[package]] name = "mmh3" version = "4.1.0" @@ -6692,6 +6739,7 @@ cffi = ["cffi (>=1.11)"] embeddings-azopenai = ["llama-index-embeddings-azure-openai"] embeddings-gemini = ["llama-index-embeddings-gemini"] embeddings-huggingface = ["einops", "llama-index-embeddings-huggingface"] +embeddings-mistral = ["llama-index-embeddings-mistralai"] embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"] embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"] @@ -6714,4 +6762,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "45264d80672084e35ca0ea11b368a29001a3b9003822bddc67fb18489a8fe519" +content-hash = "3fa6ef447847895b1a16b8b0422dd9e4fda1aaaadef3af71971eb412da89bf67" diff --git a/private_gpt/components/embedding/embedding_component.py b/private_gpt/components/embedding/embedding_component.py index 5d3e99749..b55cef873 100644 --- a/private_gpt/components/embedding/embedding_component.py +++ b/private_gpt/components/embedding/embedding_component.py @@ -144,6 +144,23 @@ def __init__(self, settings: Settings) -> None: api_key=settings.gemini.api_key, model_name=settings.gemini.embedding_model, ) + case "mistralai": + try: + from llama_index.embeddings.mistralai import ( # type: ignore + MistralAIEmbedding, + ) + except ImportError as e: + raise ImportError( + "Mistral dependencies not found, install with `poetry install --extras embeddings-mistral`" + ) from e + + api_key = settings.openai.api_key + model = settings.openai.embedding_model + + self.embedding_model = MistralAIEmbedding( + api_key=api_key, + model=model, + ) case "mock": # Not a random number, is the dimensionality used by # the default embedding model diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 4cf192a3e..9b4238dd5 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -197,7 +197,14 @@ class HuggingFaceSettings(BaseModel): class EmbeddingSettings(BaseModel): mode: Literal[ - "huggingface", "openai", "azopenai", "sagemaker", "ollama", "mock", "gemini" + "huggingface", + "openai", + "azopenai", + "sagemaker", + "ollama", + "mock", + "gemini", + "mistralai", ] ingest_mode: Literal["simple", "batch", "parallel", "pipeline"] = Field( "simple", diff --git a/pyproject.toml b/pyproject.toml index da9fab80a..afbb83ccf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ llama-index-embeddings-huggingface = {version ="^0.2.2", optional = true} llama-index-embeddings-openai = {version ="^0.1.10", optional = true} llama-index-embeddings-azure-openai = {version ="^0.1.10", optional = true} llama-index-embeddings-gemini = {version ="^0.1.8", optional = true} +llama-index-embeddings-mistralai = {version ="^0.1.6", optional = true} llama-index-vector-stores-qdrant = {version ="^0.2.10", optional = true} llama-index-vector-stores-milvus = {version ="^0.1.20", optional = true} llama-index-vector-stores-chroma = {version ="^0.1.10", optional = true} @@ -83,6 +84,7 @@ embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"] embeddings-azopenai = ["llama-index-embeddings-azure-openai"] embeddings-gemini = ["llama-index-embeddings-gemini"] +embeddings-mistral = ["llama-index-embeddings-mistralai"] vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] vector-stores-clickhouse = ["llama-index-vector-stores-clickhouse", "clickhouse_connect"] vector-stores-chroma = ["llama-index-vector-stores-chroma"]