Skip to content
This repository has been archived by the owner on Sep 12, 2024. It is now read-only.

Commit

Permalink
refactor llm_utils
Browse files Browse the repository at this point in the history
  • Loading branch information
fcakyon committed Oct 15, 2023
1 parent 789faa3 commit 87253fc
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 61 deletions.
70 changes: 70 additions & 0 deletions autollm/utils/db_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import logging
from typing import Sequence

from llama_index import Document

from autollm.auto.vector_store import AutoVectorStore
from autollm.utils.constants import DEFAULT_INDEX_NAME, DEFAULT_VECTORE_STORE_TYPE
from autollm.utils.hash_utils import check_for_changes

logger = logging.getLogger(__name__)


def initialize_database(
documents: Sequence[Document], vectore_store_type: str = DEFAULT_VECTORE_STORE_TYPE) -> None:
"""
Initializes the vector database for the first time from given documents.
Parameters:
documents (Sequence[Document]): List of documents to initialize the vector store with.
vectore_store_type (str): Type of vector store to use ('qdrant', 'pinecone', etc.).
Returns:
None
"""
logger.info('Initializing vector store')

# Create a new index and connect to it
vector_store = AutoVectorStore.from_defaults(
vector_store_type=vectore_store_type, collection_name=DEFAULT_INDEX_NAME)
vector_store.initialize_vectorindex()
vector_store.connect_vectorstore()

logger.info('Updating vector store with documents')

# Update the index with the documents
vector_store.overwrite_vectorindex(documents)

logger.info('Vector database successfully initialized.')


def update_database(documents: Sequence[Document], vectore_store_type: str) -> None:
"""
Update the vector database to synchronize it with the provided list of documents.
This function performs the following actions:
1. Updates or adds new documents in the vector database that match the input list.
2. Removes any documents from the vector database that are not present in the input list.
Parameters:
documents (Sequence[Document]): Complete set of documents that should exist in the vector database after the update.
vectore_store_type (str): Specifies the type of vector store to use (e.g., 'qdrant', 'pinecone'). Defaults to DEFAULT_VECTORE_STORE_TYPE.
Returns:
None
Note:
Ensure that the 'documents' list includes all documents that should remain in the database, as any missing items will be deleted.
"""
logger.info('Updating vector store')

# Get changed document ids using the hash of the documents available in the vector store index item metadata
vector_store = AutoVectorStore.from_defaults(
vector_store_type=vectore_store_type, index_name=DEFAULT_INDEX_NAME)
changed_documents, deleted_document_ids = check_for_changes(documents, vector_store)

# Update the index with the changed documents
vector_store.update_vectorindex(changed_documents)
vector_store.delete_documents_by_id(deleted_document_ids)

logger.info('Vector database successfully updated.')
61 changes: 0 additions & 61 deletions autollm/utils/llm_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# Desc: Utility functions for llama index.
import logging
from typing import Sequence

Expand All @@ -13,66 +12,6 @@
logger = logging.getLogger(__name__)


def initialize_database(
documents: Sequence[Document], vectore_store_type: str = DEFAULT_VECTORE_STORE_TYPE) -> None:
"""
Initializes the vector database for the first time from given documents.
Parameters:
documents (Sequence[Document]): List of documents to initialize the vector store with.
vectore_store_type (str): Type of vector store to use ('qdrant', 'pinecone', etc.).
Returns:
None
"""
logger.info('Initializing vector store')

# Create a new index and connect to it
vector_store = AutoVectorStore.from_defaults(
vector_store_type=vectore_store_type, collection_name=DEFAULT_INDEX_NAME)
vector_store.initialize_vectorindex()
vector_store.connect_vectorstore()

logger.info('Updating vector store with documents')

# Update the index with the documents
vector_store.overwrite_vectorindex(documents)

logger.info('Vector database successfully initialized.')


def update_database(documents: Sequence[Document], vectore_store_type: str) -> None:
"""
Update the vector database to synchronize it with the provided list of documents.
This function performs the following actions:
1. Updates or adds new documents in the vector database that match the input list.
2. Removes any documents from the vector database that are not present in the input list.
Parameters:
documents (Sequence[Document]): Complete set of documents that should exist in the vector database after the update.
vectore_store_type (str): Specifies the type of vector store to use (e.g., 'qdrant', 'pinecone'). Defaults to DEFAULT_VECTORE_STORE_TYPE.
Returns:
None
Note:
Ensure that the 'documents' list includes all documents that should remain in the database, as any missing items will be deleted.
"""
logger.info('Updating vector store')

# Get changed document ids using the hash of the documents available in the vector store index item metadata
vector_store = AutoVectorStore.from_defaults(
vector_store_type=vectore_store_type, index_name=DEFAULT_INDEX_NAME)
changed_documents, deleted_document_ids = check_for_changes(documents, vector_store)

# Update the index with the changed documents
vector_store.update_vectorindex(changed_documents)
vector_store.delete_documents_by_id(deleted_document_ids)

logger.info('Vector database successfully updated.')


def set_default_prompt_template() -> ChatPromptTemplate:
"""
Sets the default prompt template for the query engine.
Expand Down

0 comments on commit 87253fc

Please sign in to comment.