From aa20306eb919fad5601ecb2a9b1a0f430f0bffce Mon Sep 17 00:00:00 2001 From: samanthajmichael Date: Tue, 10 Dec 2024 22:34:19 -0500 Subject: [PATCH] reformatted --- frontend/core/__init__.py | 2 +- frontend/core/helpers.py | 18 +++++++++++++++--- frontend/core/rag.py | 13 +++++++++---- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/frontend/core/__init__.py b/frontend/core/__init__.py index d739db6..f7a5388 100644 --- a/frontend/core/__init__.py +++ b/frontend/core/__init__.py @@ -9,10 +9,10 @@ process_uploaded_tc, read_file_content, read_pdf_to_string, + retrieve_all_metadata, retrieve_context_per_question, show_context, text_wrap, - retrieve_all_metadata ) from .rag import ( create_documents_with_metadata, diff --git a/frontend/core/helpers.py b/frontend/core/helpers.py index 0655d23..c68a9ba 100644 --- a/frontend/core/helpers.py +++ b/frontend/core/helpers.py @@ -258,7 +258,9 @@ def retrieve_all_metadata(vectorstore): titles = {doc.metadata.get("title", "Unknown") for doc in documents} return sorted(titles) else: - raise ValueError("Vectorstore does not have a valid 'docstore' or metadata.") + raise ValueError( + "Vectorstore does not have a valid 'docstore' or metadata." + ) except Exception as e: raise ValueError(f"Metadata retrieval error: {e}") @@ -274,7 +276,13 @@ def retrieve_context_per_question(question, retriever): Returns: list: A list of metadata titles if the query is about terms, or context otherwise. """ - if any(keyword in question.lower() for keyword in ["what terms and conditions do you have access to", "what companies terms and conditions do you have access to"]): + if any( + keyword in question.lower() + for keyword in [ + "what terms and conditions do you have access to", + "what companies terms and conditions do you have access to", + ] + ): try: return retrieve_all_metadata(retriever.vectorstore) except Exception as e: @@ -283,7 +291,11 @@ def retrieve_context_per_question(question, retriever): # Retrieve relevant context for general questions try: results = retriever.get_relevant_documents(question) - return [doc.page_content for doc in results] if results else ["No relevant context found."] + return ( + [doc.page_content for doc in results] + if results + else ["No relevant context found."] + ) except Exception as e: raise ValueError(f"Error retrieving context: {e}") diff --git a/frontend/core/rag.py b/frontend/core/rag.py index d5bd4e6..2f91c73 100644 --- a/frontend/core/rag.py +++ b/frontend/core/rag.py @@ -33,7 +33,9 @@ def load_metadata(metadata_path): return json.load(f) -def create_documents_with_metadata(metadata_list, data_folder, chunk_size=1000, chunk_overlap=200): +def create_documents_with_metadata( + metadata_list, data_folder, chunk_size=1000, chunk_overlap=200 +): """ Create documents with metadata and content chunks. """ @@ -58,9 +60,13 @@ def create_documents_with_metadata(metadata_list, data_folder, chunk_size=1000, return documents -def initialize_vectorstore_with_metadata(metadata_file, data_folder, chunk_size=1000, chunk_overlap=200): +def initialize_vectorstore_with_metadata( + metadata_file, data_folder, chunk_size=1000, chunk_overlap=200 +): metadata_list = load_metadata(metadata_file) - documents = create_documents_with_metadata(metadata_list, data_folder, chunk_size, chunk_overlap) + documents = create_documents_with_metadata( + metadata_list, data_folder, chunk_size, chunk_overlap + ) embeddings = OpenAIEmbeddings() # Ensure metadata is being printed for debugging @@ -99,7 +105,6 @@ def initialize_rag(metadata_file=metadata_path, data_folder=data_dir, k=2): raise ValueError(f"Failed to initialize RAG system: {e}") - def encode_documents(path, chunk_size=1000, chunk_overlap=200): """ Encodes all text files into a vector store using OpenAI embeddings and includes metadata.