Skip to content
This repository has been archived by the owner on Sep 12, 2024. It is now read-only.

refactor AutoVectorStore and move methods to db_utils #11

Merged
merged 9 commits into from
Oct 16, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 70 additions & 25 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,29 +70,26 @@ os.environ["AWS_REGION_NAME"] = ""
llm = AutoLLM(model="anthropic.claude-v2")
```

### AutoVectorStore (Supported VectorDBs: Pinecone, Qdrant, InMemory)
### AutoVectorStoreIndex (Supports [20+ VectorDBs](https://docs.llamaindex.ai/en/stable/core_modules/data_modules/storage/vector_stores.html#vector-store-options-feature-support))

Instantly initialize a VectorDB instance with same API
Dynamically initialize a VectorStoreIndex instance from 20+ VectorDB options with the same AutoVectorStoreIndex api

```python
from autollm import AutoVectorStore

# Dynamically initialize a VectorDB instance
vector_store = AutoVectorStore.from_defaults(
vector_store_type="qdrant", index_name="quickstart", size=1536, distance="EUCLID"
from autollm import AutoVectorStoreIndex

# Dynamically initialize a VectorStoreIndex instance with the same AutoVectorStoreIndex api
vector_store_index = AutoVectorStoreIndex.from_defaults(
vector_store_type="QdrantVectorStore", client=qdrant_client.QdrantClient(
uri="http://<host>:<port>"
api_key="<qdrant-api-key>",
), collection_name="quickstart"
)

vector_store = AutoVectorStore.from_defaults(
vector_store_type="pinecone",
index_name="quickstart",
dimension=1536,
metric_type="euclidean",
pod_type="p1",
)
vector_store_index = AutoVectorStoreIndex.from_defaults(vector_store_type="PineconeVectorStore", pinecone_index=pinecone.Index("quickstart"))

vector_store = AutoVectorStore.from_defaults(
vector_store_type="in_memory", path_or_files="path/to/documents"
)

vector_store_index = AutoVectorStoreIndex.from_defaults(
vector_store_type="VectorStoreIndex", documents=documents)
```

### AutoQueryEngine (Creates a query engine pipeline in a single line of code)
Expand All @@ -104,9 +101,9 @@ Create robust query engine pipelines with automatic cost logging. Supports fine-
```python
from autollm import AutoQueryEngine

# Initialize a query engine with existing vector store and service context
vector_store = AutoVectorStore.from_defaults(
vector_store_type="in_memory", input_files="path/to/documents"
# Initialize a query engine with existing vector store index and service context
vector_store_index = AutoVectorStoreIndex.from_defaults(
vector_store_type="VectorStoreIndex", documents=documents
)
service_context = AutoServiceContext.from_defaults(enable_cost_calculator=True)
query_engine = AutoQueryEngine.from_instances(vector_store, service_context)
Expand Down Expand Up @@ -139,15 +136,20 @@ query_engine = AutoQueryEngine.from_parameters(
query_wrapper_prompt="Your Query Wrapper Prompt",
enable_cost_calculator=True,
llm_params={"model": "gpt-3.5-turbo"},
vector_store_params={"vector_store_type": "qdrant", "index_name": "quickstart"},
vector_store_params={"vector_store_type": "QdrantVectorStore", "client": qdrant_client.QdrantClient(
url="http://<host>:<port>"
api_key="<qdrant-api-key>",
), "collection_name": "quickstart"},
service_context_params={"chunk_size": 1024},
query_engine_params={"similarity_top_k": 10},
)

response = query_engine.query("Why is SafeVideo AI awesome?")

print(response.response)
```

```
>> Because they redefine the movie experience by AI!
```

Expand All @@ -169,6 +171,39 @@ LLM Total Token Cost: $0.002317
"""
```

### Document Providers (Powerful Github and Local Solutions)

Unlock the potential of your content with AutoLLM's robust document providers. Seamlessly pull, process, and analyze documents from GitHub repositories or local directories.

#### GitHub Document Provider

Fetch up-to-date documents directly from your GitHub repositories—ideal for real-time data pipelines and collaborative projects.

```python
from autollm.utils.document_providers import github_document_provider

git_repo_url = "https://github.com/safevideo.git"
local_repo_path = Path("/safevideo/")
# Specify where to find the documents in the repo
relative_docs_path = Path("docs/")

# Fetch and process documents
documents = github_document_provider(git_repo_url, local_repo_path, relative_docs_path)
```

#### Local Document Provider

Process documents from local directories—ideal for offline data pipelines and local development.

```python
from autollm.utils.document_providers import local_document_provider

input_dir = "/local/documents/path"

# Read files as documents from local directory
documents = local_document_provider(input_dir=input_dir)
```

______________________________________________________________________

## FAQ
Expand All @@ -185,10 +220,10 @@ Our roadmap outlines upcoming features and integrations aimed at making QuickLLM

- [ ] **VectorDB Integrations**:

- [ ] Decouple DB index operations from vector store classes
- [ ] Add utility functions for creating and updating indexes based on local files and llamaindex vector store instances
- [ ] Update AutoVectorStore to support all VectorDB integrations without manual maintenance of vector store classes
- [ ] Update AutoQueryEngine, AutoLLM, and AutoServiceContext to support new AutoVectorStore API
- [x] Decouple DB index operations from vector store classes
- \[\] Add utility functions for creating and updating indexes based on local files and llamaindex vector store instances
- [x] Update AutoVectorStore to support all VectorDB integrations without manual maintenance of vector store classes
- [x] Update AutoQueryEngine, AutoLLM, and AutoServiceContext to support new AutoVectorStore API

- [ ] **Pipelines**:

Expand All @@ -199,6 +234,16 @@ Our roadmap outlines upcoming features and integrations aimed at making QuickLLM

- [ ] FastAPI integration for Pipelines

- \[\] **Tests**:

- [ ] Add unit tests for online vectorDB integrations

- [ ] **Additional Document Providers**:

- [ ] Amazon S3-based document provider
- [ ] FTP-based document provider
- [ ] Google Drive-based document provider

______________________________________________________________________

## Contributing
Expand Down
6 changes: 3 additions & 3 deletions autollm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""QuickLLM: A Base Package for Large Language Model Applications.
"""AutoLLM: A Base Package for Large Language Model Applications.

This package provides automated integrations with leading large language models
and vector databases, along with various utility functions.
Expand All @@ -11,6 +11,6 @@
from autollm.auto.llm import AutoLLM
from autollm.auto.query_engine import AutoQueryEngine
from autollm.auto.service_context import AutoServiceContext
from autollm.auto.vector_store import AutoVectorStore
from autollm.auto.vector_store_index import AutoVectorStoreIndex

__all__ = ['AutoLLM', 'AutoServiceContext', 'AutoVectorStore', 'AutoQueryEngine']
__all__ = ['AutoLLM', 'AutoServiceContext', 'AutoVectorStoreIndex', 'AutoQueryEngine']
24 changes: 11 additions & 13 deletions autollm/auto/query_engine.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,39 @@
from llama_index import ServiceContext
from llama_index import ServiceContext, VectorStoreIndex
from llama_index.indices.query.base import BaseQueryEngine

from autollm.auto.llm import AutoLLM
from autollm.auto.service_context import AutoServiceContext
from autollm.auto.vector_store import AutoVectorStore
from autollm.vectorstores.base import BaseVS
from autollm.auto.vector_store_index import AutoVectorStoreIndex


class AutoQueryEngine:
"""AutoQueryEngine for query execution and optionally logging the query cost."""

@staticmethod
def from_instances(vector_store: BaseVS, service_context: ServiceContext, **kwargs) -> BaseQueryEngine:
def from_instances(
vector_store_index: VectorStoreIndex, service_context: ServiceContext,
**kwargs) -> BaseQueryEngine:
"""
Create an AutoQueryEngine from a vector store and a service context.
Create an AutoQueryEngine from a vector store index and a service context.

Parameters:
vector_store: Vector store instance.
vector_store_index: Vector store index instance.
service_context: Service context instance.
**kwargs: Keyword arguments for the query engine.

Returns:
An AutoQueryEngine instance.
"""

return vector_store.vectorindex.as_query_engine(service_context=service_context, **kwargs)
return vector_store_index.as_query_engine(service_context=service_context, **kwargs)

@staticmethod
def from_parameters(
system_prompt: str = None,
query_wrapper_prompt: str = None,
enable_cost_calculator: bool = True,
llm_params: dict = None,
vector_store_params: dict = {"vector_store_type": "in_memory"},
vector_store_params: dict = {"vector_store_type": "VectorStoreIndex"},
service_context_params: dict = None,
query_engine_params: dict = None) -> BaseQueryEngine:
"""
Expand All @@ -57,15 +58,12 @@ def from_parameters(
query_engine_params = {} if query_engine_params is None else query_engine_params

llm = AutoLLM.from_defaults(**llm_params)
vector_store = AutoVectorStore.from_defaults(**vector_store_params)
vector_store.initialize_vectorindex()
vector_store.connect_vectorstore()
vector_store_index = AutoVectorStoreIndex.from_defaults(**vector_store_params)
service_context = AutoServiceContext.from_defaults(
llm=llm,
system_prompt=system_prompt,
query_wrapper_prompt=query_wrapper_prompt,
enable_cost_calculator=enable_cost_calculator,
**service_context_params)

return vector_store.vectorindex.as_query_engine(
service_context=service_context, **query_engine_params)
return vector_store_index.as_query_engine(service_context=service_context, **query_engine_params)
60 changes: 0 additions & 60 deletions autollm/auto/vector_store.py

This file was deleted.

51 changes: 51 additions & 0 deletions autollm/auto/vector_store_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from typing import Optional, Sequence

from llama_index import Document, VectorStoreIndex


def import_vector_store_class(vector_store_class_name: str):
"""
Imports a predefined vector store class by class name.

Args:
Returns:
The imported VectorStore class.
"""
module = __import__("llama_index.vector_stores", fromlist=[vector_store_class_name])
class_ = getattr(module, vector_store_class_name)
return class_


class AutoVectorStoreIndex:
"""AutoVectorStoreIndex lets you dynamically initialize any Vector Store index based on the vector store
class name and additional parameters.
"""

@staticmethod
def from_defaults(
vector_store_type: str,
documents: Optional[Sequence[Document]] = None,
*args,
**kwargs) -> VectorStoreIndex:
"""
Initializes a Vector Store index from Vector Store type and additional parameters.

Parameters:
vector_store_type (str): The class name of the vector store (e.g., 'PineconeVectorStore', 'VectorStoreIndex')
documents (Optional[Sequence[Document]]): Documents to initialize in memory vector store index.
*args: Additional positional arguments for initializing the vector store
**kwargs: Additional parameters for initializing the vector store

Returns:
index (VectorStoreIndex): The initialized Vector Store index instance for given vector store type and parameter set.
"""
if documents is None:
documents = [Document.example()]
if vector_store_type == "VectorStoreIndex":
index = VectorStoreIndex.from_documents(documents=[documents], *args, **kwargs)
else:
VectorStoreClass = import_vector_store_class(vector_store_type)
vector_store = VectorStoreClass(*args, **kwargs)
index = VectorStoreIndex.from_vector_store(vector_store=vector_store, *args, **kwargs)

return index
Loading