safevideo · fcakyon · Oct 16, 2023 · Oct 16, 2023 · Oct 16, 2023 · Oct 16, 2023
diff --git a/autollm/__init__.py b/autollm/__init__.py
@@ -1,4 +1,4 @@
-"""QuickLLM: A Base Package for Large Language Model Applications.
+"""AutoLLM: A Base Package for Large Language Model Applications.
 
 This package provides automated integrations with leading large language models
 and vector databases, along with various utility functions.
@@ -11,6 +11,6 @@
 from autollm.auto.llm import AutoLLM
 from autollm.auto.query_engine import AutoQueryEngine
 from autollm.auto.service_context import AutoServiceContext
-from autollm.auto.vector_store import AutoVectorStore
+from autollm.auto.vector_store_index import AutoVectorStoreIndex
 
-__all__ = ['AutoLLM', 'AutoServiceContext', 'AutoVectorStore', 'AutoQueryEngine']
+__all__ = ['AutoLLM', 'AutoServiceContext', 'AutoVectorStoreIndex', 'AutoQueryEngine']
diff --git a/autollm/auto/query_engine.py b/autollm/auto/query_engine.py
@@ -1,38 +1,39 @@
-from llama_index import ServiceContext
+from llama_index import ServiceContext, VectorStoreIndex
 from llama_index.indices.query.base import BaseQueryEngine
 
 from autollm.auto.llm import AutoLLM
 from autollm.auto.service_context import AutoServiceContext
-from autollm.auto.vector_store import AutoVectorStore
-from autollm.vectorstores.base import BaseVS
+from autollm.auto.vector_store_index import AutoVectorStoreIndex
 
 
 class AutoQueryEngine:
     """AutoQueryEngine for query execution and optionally logging the query cost."""
 
     @staticmethod
-    def from_instances(vector_store: BaseVS, service_context: ServiceContext, **kwargs) -> BaseQueryEngine:
+    def from_instances(
+            vector_store_index: VectorStoreIndex, service_context: ServiceContext,
+            **kwargs) -> BaseQueryEngine:
         """
-        Create an AutoQueryEngine from a vector store and a service context.
+        Create an AutoQueryEngine from a vector store index and a service context.
 
         Parameters:
-            vector_store: Vector store instance.
+            vector_store_index: Vector store index instance.
             service_context: Service context instance.
             **kwargs: Keyword arguments for the query engine.
 
         Returns:
             An AutoQueryEngine instance.
         """
 
-        return vector_store.vectorindex.as_query_engine(service_context=service_context, **kwargs)
+        return vector_store_index.as_query_engine(service_context=service_context, **kwargs)
 
     @staticmethod
     def from_parameters(
             system_prompt: str = None,
             query_wrapper_prompt: str = None,
             enable_cost_calculator: bool = True,
             llm_params: dict = None,
-            vector_store_params: dict = {"vector_store_type": "in_memory"},
+            vector_store_params: dict = {"vector_store_type": "VectorStoreIndex"},
             service_context_params: dict = None,
             query_engine_params: dict = None) -> BaseQueryEngine:
         """
@@ -57,15 +58,12 @@ def from_parameters(
         query_engine_params = {} if query_engine_params is None else query_engine_params
 
         llm = AutoLLM.from_defaults(**llm_params)
-        vector_store = AutoVectorStore.from_defaults(**vector_store_params)
-        vector_store.initialize_vectorindex()
-        vector_store.connect_vectorstore()
+        vector_store_index = AutoVectorStoreIndex.from_defaults(**vector_store_params)
         service_context = AutoServiceContext.from_defaults(
             llm=llm,
             system_prompt=system_prompt,
             query_wrapper_prompt=query_wrapper_prompt,
             enable_cost_calculator=enable_cost_calculator,
             **service_context_params)
 
-        return vector_store.vectorindex.as_query_engine(
-            service_context=service_context, **query_engine_params)
+        return vector_store_index.as_query_engine(service_context=service_context, **query_engine_params)
diff --git a/autollm/auto/vector_store.py b/autollm/auto/vector_store.py
diff --git a/autollm/auto/vector_store_index.py b/autollm/auto/vector_store_index.py
@@ -0,0 +1,48 @@
+from typing import Optional, Sequence
+
+from llama_index import Document, VectorStoreIndex
+
+
+def import_vector_store_class(vector_store_class_name: str):
+    """
+    Imports a predefined vector store class by class name.
+
+    Args:
+    Returns:
+        The imported VectorStore class.
+    """
+    module = __import__("llama_index.vector_stores", fromlist=[vector_store_class_name])
+    class_ = getattr(module, vector_store_class_name)
+    return class_
+
+
+class AutoVectorStoreIndex:
+    """AutoVectorStoreIndex lets you dynamically initialize any Vector Store index based on the vector store
+    class name and additional parameters.
+    """
+
+    @staticmethod
+    def from_defaults(
+            vector_store_type: str,
+            documents: Optional[Sequence[Document]] = None,
+            *args,
+            **kwargs) -> VectorStoreIndex:
+        """
+        Initializes a Vector Store index from Vector Store type and additional parameters.
+
+        Parameters:
+            vector_store_type (str): The class name of the vector store (e.g., 'PineconeVectorStore', 'VectorStoreIndex')
+            documents (Optional[Sequence[Document]]): Documents to initialize in memory vector store index.
+            *args: Additional positional arguments for initializing the vector store
+            **kwargs: Additional parameters for initializing the vector store
+
+        Returns:
+            index (VectorStoreIndex): The initialized Vector Store index instance for given vector store type and parameter set.
+        """
+        if vector_store_type == "VectorStoreIndex":
+            index = VectorStoreIndex.from_documents(documents=[documents], *args, **kwargs)
+        else:
+            vector_store = import_vector_store_class(vector_store_type)
+            index = VectorStoreIndex.from_vector_store(vector_store=vector_store, *args, **kwargs)
+
+        return index
diff --git a/autollm/utils/db_utils.py b/autollm/utils/db_utils.py
@@ -1,70 +1,159 @@
+# db_utils.py
 import logging
 from typing import Sequence
 
-from llama_index import Document
+import pinecone
+from llama_index import Document, StorageContext, VectorStoreIndex
+from llama_index.vector_stores import PineconeVectorStore, QdrantVectorStore
+from qdrant_client import QdrantClient
+from qdrant_client.models import Distance, VectorParams
 
-from autollm.auto.vector_store import AutoVectorStore
-from autollm.utils.constants import DEFAULT_INDEX_NAME, DEFAULT_VECTORE_STORE_TYPE
+from autollm.auto.vector_store_index import AutoVectorStoreIndex
+from autollm.utils.constants import DEFAULT_INDEX_NAME
+from autollm.utils.env_utils import read_env_variable
 from autollm.utils.hash_utils import check_for_changes
 
 logger = logging.getLogger(__name__)
 
 
-def initialize_database(
-        documents: Sequence[Document], vectore_store_type: str = DEFAULT_VECTORE_STORE_TYPE) -> None:
+def initialize_pinecone_index(
+        index_name: str, dimension: int = 1536, metric: str = 'euclidean', pod_type: str = 'p1'):
+    # Read environment variables for Pinecone initialization
+    api_key = read_env_variable('PINECONE_API_KEY')
+    environment = read_env_variable('PINECONE_ENVIRONMENT')
+
+    # Initialize Pinecone
+    pinecone.init(api_key=api_key, environment=environment)
+    pinecone.create_index(index_name, dimension=dimension, metric=metric, pod_type=pod_type)
+
+
+def initialize_qdrant_index(index_name: str, size: int = 1536, distance: str = 'EUCLID'):
+    # Initialize client
+    url = read_env_variable('QDRANT_URL')
+    api_key = read_env_variable('QDRANT_API_KEY')
+    client = QdrantClient(url=url, api_key=api_key)
+
+    # Convert string distance measure to Distance Enum equals to Distance.EUCLID
+    distance = Distance[distance]
+
+    # Create index
+    client.recreate_collection(
+        collection_name=index_name, vectors_config=VectorParams(size=size, distance=distance))
+
+
+def connect_vectorstore(vector_store, **params):
+    """Connect to an existing vector store."""
+    # Logic to connect to vector store based on the specific type of vector store
+    if isinstance(vector_store, PineconeVectorStore):
+        vector_store.pinecone_index = pinecone.Index(params['index_name'])
+    elif isinstance(vector_store, QdrantVectorStore):
+        vector_store.client = QdrantClient(url=params['url'], api_key=params['api_key'])
+    # TODO: Add more elif conditions for other vector stores as needed
+
+
+def update_vector_store_index(vector_store_index: VectorStoreIndex, documents: Sequence[Document]):
     """
-    Initializes the vector database for the first time from given documents.
+    Update the vector store index with new documents.
 
     Parameters:
-        documents (Sequence[Document]): List of documents to initialize the vector store with.
-        vectore_store_type (str): Type of vector store to use ('qdrant', 'pinecone', etc.).
+        vector_store_index: An instance of AutoVectorStoreIndex or any compatible vector store.
+        documents (Sequence[Document]): List of documents to update.
 
     Returns:
         None
     """
-    logger.info('Initializing vector store')
+    for document in documents:
+        delete_documents_by_id(vector_store_index, [document.id_])
+        vector_store_index.insert(document)
+
 
-    # Create a new index and connect to it
-    vector_store = AutoVectorStore.from_defaults(
-        vector_store_type=vectore_store_type, collection_name=DEFAULT_INDEX_NAME)
-    vector_store.initialize_vectorindex()
-    vector_store.connect_vectorstore()
+def overwrite_vectorindex(vector_store, documents: Sequence[Document]):
+    """
+    Overwrite the vector store index with new documents.
 
-    logger.info('Updating vector store with documents')
+    Parameters:
+        vector_store: An instance of AutoVectorStore or any compatible vector store.
+        documents (Sequence[Document]): List of documents to overwrite.
 
-    # Update the index with the documents
-    vector_store.overwrite_vectorindex(documents)
+    Returns:
+        None
+    """
+    # Create storage context
+    storage_context = StorageContext.from_defaults(vector_store=vector_store)
 
-    logger.info('Vector database successfully initialized.')
+    # Create index, which will insert documents/vectors to vector store
+    _ = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
 
 
-def update_database(documents: Sequence[Document], vectore_store_type: str) -> None:
+def delete_documents_by_id(vector_store_index: VectorStoreIndex, document_ids: Sequence[str]):
     """
-    Update the vector database to synchronize it with the provided list of documents.
-
-    This function performs the following actions:
-    1. Updates or adds new documents in the vector database that match the input list.
-    2. Removes any documents from the vector database that are not present in the input list.
+    Delete documents from vector store by their ids.
 
     Parameters:
-        documents (Sequence[Document]): Complete set of documents that should exist in the vector database after the update.
-        vectore_store_type (str): Specifies the type of vector store to use (e.g., 'qdrant', 'pinecone'). Defaults to DEFAULT_VECTORE_STORE_TYPE.
+        vector_store_index: An instance of AutoVectorStoreIndex or any compatible vector store.
+        document_ids (Sequence[str]): List of document ids to delete.
 
     Returns:
         None
-
-    Note:
-        Ensure that the 'documents' list includes all documents that should remain in the database, as any missing items will be deleted.
     """
-    logger.info('Updating vector store')
+    # Check if there are any document IDs to delete.
+    if not document_ids:
+        return
+
+    # Proceed with deletion.
+    for document_id in document_ids:
+        vector_store_index.delete_ref_doc(document_id, delete_from_docstore=True)
+
+
+# TODO: refactor and update.
+# def initialize_database(
+#         documents: Sequence[Document], vector_store_class_name: str, **vector_store_params) -> None:
+#     logger.info('Initializing vector store')
+
+#     vector_store = AutoVectorStore.from_defaults(vector_store_class_name, **vector_store_params)
+
+#     if vector_store_class_name == 'PineconeVectorStore':
+#         initialize_pinecone_index(vector_store, **vector_store_params)
+#     elif vector_store_class_name == 'QdrantVectorStore':
+#         initialize_qdrant_index(vector_store, **vector_store_params)
+#     # TODO: Add more elif conditions for other vector stores as needed
+
+#     connect_vectorstore(vector_store, **vector_store_params)
+
+#     logger.info('Updating vector store with documents')
+
+#     update_vector_store_index(vector_store, documents)
+
+#     logger.info('Vector database successfully initialized.')
+
+# # TODO: refactor and update.
+# def update_database(documents: Sequence[Document], vectore_store_type: str) -> None:
+#     """
+#     Update the vector database to synchronize it with the provided list of documents.
+
+#     This function performs the following actions:
+#     1. Updates or adds new documents in the vector database that match the input list.
+#     2. Removes any documents from the vector database that are not present in the input list.
+
+#     Parameters:
+#         documents (Sequence[Document]): Complete set of documents that should exist in the vector database after the update.
+#         vectore_store_type (str): Specifies the type of vector store to use (e.g., 'qdrant', 'pinecone'). Defaults to DEFAULT_VECTORE_STORE_TYPE.
+
+#     Returns:
+#         None
+
+#     Note:
+#         Ensure that the 'documents' list includes all documents that should remain in the database, as any missing items will be deleted.
+#     """
+#     logger.info('Updating vector store')
 
-    # Get changed document ids using the hash of the documents available in the vector store index item metadata
-    vector_store = AutoVectorStore.from_defaults(
-        vector_store_type=vectore_store_type, index_name=DEFAULT_INDEX_NAME)
-    changed_documents, deleted_document_ids = check_for_changes(documents, vector_store)
+#     # Get changed document ids using the hash of the documents available in the vector store index item metadata
+#     vector_store = AutoVectorStore.from_defaults(
+#         vector_store_type=vectore_store_type, index_name=DEFAULT_INDEX_NAME)
+#     changed_documents, deleted_document_ids = check_for_changes(documents, vector_store)
 
-    # Update the index with the changed documents
-    vector_store.update_vectorindex(changed_documents)
-    vector_store.delete_documents_by_id(deleted_document_ids)
+#     # Update the index with the changed documents
+#     vector_store.update_vectorindex(changed_documents)
+#     vector_store.delete_documents_by_id(deleted_document_ids)
 
-    logger.info('Vector database successfully updated.')
+#     logger.info('Vector database successfully updated.')
diff --git a/autollm/vectorstores/__init__.py b/autollm/vectorstores/__init__.py