safevideo · fcakyon · Oct 16, 2023 · Oct 16, 2023 · Oct 16, 2023 · Oct 16, 2023
diff --git a/README.md b/README.md
@@ -70,29 +70,26 @@ os.environ["AWS_REGION_NAME"] = ""
 llm = AutoLLM(model="anthropic.claude-v2")
 ```
 
-### AutoVectorStore (Supported VectorDBs: Pinecone, Qdrant, InMemory)
+### AutoVectorStoreIndex (Supports [20+ VectorDBs](https://docs.llamaindex.ai/en/stable/core_modules/data_modules/storage/vector_stores.html#vector-store-options-feature-support))
 
-Instantly initialize a VectorDB instance with same API
+Dynamically initialize a VectorStoreIndex instance from 20+ VectorDB options with the same AutoVectorStoreIndex api
 
 ```python
-from autollm import AutoVectorStore
-
-# Dynamically initialize a VectorDB instance
-vector_store = AutoVectorStore.from_defaults(
-    vector_store_type="qdrant", index_name="quickstart", size=1536, distance="EUCLID"
+from autollm import AutoVectorStoreIndex
+
+# Dynamically initialize a VectorStoreIndex instance with the same AutoVectorStoreIndex api
+vector_store_index = AutoVectorStoreIndex.from_defaults(
+    vector_store_type="QdrantVectorStore", client=qdrant_client.QdrantClient(
+    uri="http://<host>:<port>"
+    api_key="<qdrant-api-key>",
+), collection_name="quickstart"
 )
 
-vector_store = AutoVectorStore.from_defaults(
-    vector_store_type="pinecone",
-    index_name="quickstart",
-    dimension=1536,
-    metric_type="euclidean",
-    pod_type="p1",
-)
+vector_store_index = AutoVectorStoreIndex.from_defaults(vector_store_type="PineconeVectorStore", pinecone_index=pinecone.Index("quickstart"))
 
-vector_store = AutoVectorStore.from_defaults(
-    vector_store_type="in_memory", path_or_files="path/to/documents"
-)
+
+vector_store_index = AutoVectorStoreIndex.from_defaults(
+        vector_store_type="VectorStoreIndex", documents=documents)
 ```
 
 ### AutoQueryEngine (Creates a query engine pipeline in a single line of code)
@@ -104,9 +101,9 @@ Create robust query engine pipelines with automatic cost logging. Supports fine-
 ```python
 from autollm import AutoQueryEngine
 
-# Initialize a query engine with existing vector store and service context
-vector_store = AutoVectorStore.from_defaults(
-    vector_store_type="in_memory", input_files="path/to/documents"
+# Initialize a query engine with existing vector store index and service context
+vector_store_index = AutoVectorStoreIndex.from_defaults(
+    vector_store_type="VectorStoreIndex", documents=documents
 )
 service_context = AutoServiceContext.from_defaults(enable_cost_calculator=True)
 query_engine = AutoQueryEngine.from_instances(vector_store, service_context)
@@ -139,15 +136,20 @@ query_engine = AutoQueryEngine.from_parameters(
     query_wrapper_prompt="Your Query Wrapper Prompt",
     enable_cost_calculator=True,
     llm_params={"model": "gpt-3.5-turbo"},
-    vector_store_params={"vector_store_type": "qdrant", "index_name": "quickstart"},
+    vector_store_params={"vector_store_type": "QdrantVectorStore", "client": qdrant_client.QdrantClient(
+    url="http://<host>:<port>"
+    api_key="<qdrant-api-key>",
+), "collection_name": "quickstart"},
     service_context_params={"chunk_size": 1024},
     query_engine_params={"similarity_top_k": 10},
 )
 
 response = query_engine.query("Why is SafeVideo AI awesome?")
 
 print(response.response)
+```
 
+```
 >> Because they redefine the movie experience by AI!
 ```
 
@@ -169,6 +171,39 @@ LLM Total Token Cost: $0.002317
 """
 ```
 
+### Document Providers (Powerful Github and Local Solutions)
+
+Unlock the potential of your content with AutoLLM's robust document providers. Seamlessly pull, process, and analyze documents from GitHub repositories or local directories.
+
+#### GitHub Document Provider
+
+Fetch up-to-date documents directly from your GitHub repositories—ideal for real-time data pipelines and collaborative projects.
+
+```python
+from autollm.utils.document_providers import github_document_provider
+
+git_repo_url = "https://github.com/safevideo.git"
+local_repo_path = Path("/safevideo/")
+# Specify where to find the documents in the repo
+relative_docs_path = Path("docs/")
+
+# Fetch and process documents
+documents = github_document_provider(git_repo_url, local_repo_path, relative_docs_path)
+```
+
+#### Local Document Provider
+
+Process documents from local directories—ideal for offline data pipelines and local development.
+
+```python
+from autollm.utils.document_providers import local_document_provider
+
+input_dir = "/local/documents/path"
+
+# Read files as documents from local directory
+documents = local_document_provider(input_dir=input_dir)
+```
+
 ______________________________________________________________________
 
 ## FAQ
@@ -185,10 +220,10 @@ Our roadmap outlines upcoming features and integrations aimed at making QuickLLM
 
 - [ ] **VectorDB Integrations**:
 
-  - [ ] Decouple DB index operations from vector store classes
-  - [ ] Add utility functions for creating and updating indexes based on local files and llamaindex vector store instances
-  - [ ] Update AutoVectorStore to support all VectorDB integrations without manual maintenance of vector store classes
-  - [ ] Update AutoQueryEngine, AutoLLM, and AutoServiceContext to support new AutoVectorStore API
+  - [x] Decouple DB index operations from vector store classes
+  - \[\] Add utility functions for creating and updating indexes based on local files and llamaindex vector store instances
+  - [x] Update AutoVectorStore to support all VectorDB integrations without manual maintenance of vector store classes
+  - [x] Update AutoQueryEngine, AutoLLM, and AutoServiceContext to support new AutoVectorStore API
 
 - [ ] **Pipelines**:
 
@@ -199,6 +234,16 @@ Our roadmap outlines upcoming features and integrations aimed at making QuickLLM
 
   - [ ] FastAPI integration for Pipelines
 
+- \[\] **Tests**:
+
+  - [ ] Add unit tests for online vectorDB integrations
+
+- [ ] **Additional Document Providers**:
+
+  - [ ] Amazon S3-based document provider
+  - [ ] FTP-based document provider
+  - [ ] Google Drive-based document provider
+
 ______________________________________________________________________
 
 ## Contributing

diff --git a/autollm/__init__.py b/autollm/__init__.py
@@ -1,4 +1,4 @@
-"""QuickLLM: A Base Package for Large Language Model Applications.
+"""AutoLLM: A Base Package for Large Language Model Applications.
 
 This package provides automated integrations with leading large language models
 and vector databases, along with various utility functions.
@@ -11,6 +11,6 @@
 from autollm.auto.llm import AutoLLM
 from autollm.auto.query_engine import AutoQueryEngine
 from autollm.auto.service_context import AutoServiceContext
-from autollm.auto.vector_store import AutoVectorStore
+from autollm.auto.vector_store_index import AutoVectorStoreIndex
 
-__all__ = ['AutoLLM', 'AutoServiceContext', 'AutoVectorStore', 'AutoQueryEngine']
+__all__ = ['AutoLLM', 'AutoServiceContext', 'AutoVectorStoreIndex', 'AutoQueryEngine']
diff --git a/autollm/auto/query_engine.py b/autollm/auto/query_engine.py
@@ -1,38 +1,39 @@
-from llama_index import ServiceContext
+from llama_index import ServiceContext, VectorStoreIndex
 from llama_index.indices.query.base import BaseQueryEngine
 
 from autollm.auto.llm import AutoLLM
 from autollm.auto.service_context import AutoServiceContext
-from autollm.auto.vector_store import AutoVectorStore
-from autollm.vectorstores.base import BaseVS
+from autollm.auto.vector_store_index import AutoVectorStoreIndex
 
 
 class AutoQueryEngine:
     """AutoQueryEngine for query execution and optionally logging the query cost."""
 
     @staticmethod
-    def from_instances(vector_store: BaseVS, service_context: ServiceContext, **kwargs) -> BaseQueryEngine:
+    def from_instances(
+            vector_store_index: VectorStoreIndex, service_context: ServiceContext,
+            **kwargs) -> BaseQueryEngine:
         """
-        Create an AutoQueryEngine from a vector store and a service context.
+        Create an AutoQueryEngine from a vector store index and a service context.
 
         Parameters:
-            vector_store: Vector store instance.
+            vector_store_index: Vector store index instance.
             service_context: Service context instance.
             **kwargs: Keyword arguments for the query engine.
 
         Returns:
             An AutoQueryEngine instance.
         """
 
-        return vector_store.vectorindex.as_query_engine(service_context=service_context, **kwargs)
+        return vector_store_index.as_query_engine(service_context=service_context, **kwargs)
 
     @staticmethod
     def from_parameters(
             system_prompt: str = None,
             query_wrapper_prompt: str = None,
             enable_cost_calculator: bool = True,
             llm_params: dict = None,
-            vector_store_params: dict = {"vector_store_type": "in_memory"},
+            vector_store_params: dict = {"vector_store_type": "VectorStoreIndex"},
             service_context_params: dict = None,
             query_engine_params: dict = None) -> BaseQueryEngine:
         """
@@ -57,15 +58,12 @@ def from_parameters(
         query_engine_params = {} if query_engine_params is None else query_engine_params
 
         llm = AutoLLM.from_defaults(**llm_params)
-        vector_store = AutoVectorStore.from_defaults(**vector_store_params)
-        vector_store.initialize_vectorindex()
-        vector_store.connect_vectorstore()
+        vector_store_index = AutoVectorStoreIndex.from_defaults(**vector_store_params)
         service_context = AutoServiceContext.from_defaults(
             llm=llm,
             system_prompt=system_prompt,
             query_wrapper_prompt=query_wrapper_prompt,
             enable_cost_calculator=enable_cost_calculator,
             **service_context_params)
 
-        return vector_store.vectorindex.as_query_engine(
-            service_context=service_context, **query_engine_params)
+        return vector_store_index.as_query_engine(service_context=service_context, **query_engine_params)
diff --git a/autollm/auto/vector_store.py b/autollm/auto/vector_store.py
diff --git a/autollm/auto/vector_store_index.py b/autollm/auto/vector_store_index.py
@@ -0,0 +1,51 @@
+from typing import Optional, Sequence
+
+from llama_index import Document, VectorStoreIndex
+
+
+def import_vector_store_class(vector_store_class_name: str):
+    """
+    Imports a predefined vector store class by class name.
+
+    Args:
+    Returns:
+        The imported VectorStore class.
+    """
+    module = __import__("llama_index.vector_stores", fromlist=[vector_store_class_name])
+    class_ = getattr(module, vector_store_class_name)
+    return class_
+
+
+class AutoVectorStoreIndex:
+    """AutoVectorStoreIndex lets you dynamically initialize any Vector Store index based on the vector store
+    class name and additional parameters.
+    """
+
+    @staticmethod
+    def from_defaults(
+            vector_store_type: str,
+            documents: Optional[Sequence[Document]] = None,
+            *args,
+            **kwargs) -> VectorStoreIndex:
+        """
+        Initializes a Vector Store index from Vector Store type and additional parameters.
+
+        Parameters:
+            vector_store_type (str): The class name of the vector store (e.g., 'PineconeVectorStore', 'VectorStoreIndex')
+            documents (Optional[Sequence[Document]]): Documents to initialize in memory vector store index.
+            *args: Additional positional arguments for initializing the vector store
+            **kwargs: Additional parameters for initializing the vector store
+
+        Returns:
+            index (VectorStoreIndex): The initialized Vector Store index instance for given vector store type and parameter set.
+        """
+        if documents is None:
+            documents = [Document.example()]
+        if vector_store_type == "VectorStoreIndex":
+            index = VectorStoreIndex.from_documents(documents=[documents], *args, **kwargs)
+        else:
+            VectorStoreClass = import_vector_store_class(vector_store_type)
+            vector_store = VectorStoreClass(*args, **kwargs)
+            index = VectorStoreIndex.from_vector_store(vector_store=vector_store, *args, **kwargs)
+
+        return index