Skip to content

Commit

Permalink
reformatted
Browse files Browse the repository at this point in the history
  • Loading branch information
samanthajmichael committed Dec 11, 2024
1 parent d011110 commit aa20306
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 8 deletions.
2 changes: 1 addition & 1 deletion frontend/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
process_uploaded_tc,
read_file_content,
read_pdf_to_string,
retrieve_all_metadata,
retrieve_context_per_question,
show_context,
text_wrap,
retrieve_all_metadata
)
from .rag import (
create_documents_with_metadata,
Expand Down
18 changes: 15 additions & 3 deletions frontend/core/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,9 @@ def retrieve_all_metadata(vectorstore):
titles = {doc.metadata.get("title", "Unknown") for doc in documents}
return sorted(titles)
else:
raise ValueError("Vectorstore does not have a valid 'docstore' or metadata.")
raise ValueError(
"Vectorstore does not have a valid 'docstore' or metadata."
)
except Exception as e:
raise ValueError(f"Metadata retrieval error: {e}")

Expand All @@ -274,7 +276,13 @@ def retrieve_context_per_question(question, retriever):
Returns:
list: A list of metadata titles if the query is about terms, or context otherwise.
"""
if any(keyword in question.lower() for keyword in ["what terms and conditions do you have access to", "what companies terms and conditions do you have access to"]):
if any(
keyword in question.lower()
for keyword in [
"what terms and conditions do you have access to",
"what companies terms and conditions do you have access to",
]
):
try:
return retrieve_all_metadata(retriever.vectorstore)
except Exception as e:
Expand All @@ -283,7 +291,11 @@ def retrieve_context_per_question(question, retriever):
# Retrieve relevant context for general questions
try:
results = retriever.get_relevant_documents(question)
return [doc.page_content for doc in results] if results else ["No relevant context found."]
return (
[doc.page_content for doc in results]
if results
else ["No relevant context found."]
)
except Exception as e:
raise ValueError(f"Error retrieving context: {e}")

Expand Down
13 changes: 9 additions & 4 deletions frontend/core/rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ def load_metadata(metadata_path):
return json.load(f)


def create_documents_with_metadata(metadata_list, data_folder, chunk_size=1000, chunk_overlap=200):
def create_documents_with_metadata(
metadata_list, data_folder, chunk_size=1000, chunk_overlap=200
):
"""
Create documents with metadata and content chunks.
"""
Expand All @@ -58,9 +60,13 @@ def create_documents_with_metadata(metadata_list, data_folder, chunk_size=1000,
return documents


def initialize_vectorstore_with_metadata(metadata_file, data_folder, chunk_size=1000, chunk_overlap=200):
def initialize_vectorstore_with_metadata(
metadata_file, data_folder, chunk_size=1000, chunk_overlap=200
):
metadata_list = load_metadata(metadata_file)
documents = create_documents_with_metadata(metadata_list, data_folder, chunk_size, chunk_overlap)
documents = create_documents_with_metadata(
metadata_list, data_folder, chunk_size, chunk_overlap
)
embeddings = OpenAIEmbeddings()

# Ensure metadata is being printed for debugging
Expand Down Expand Up @@ -99,7 +105,6 @@ def initialize_rag(metadata_file=metadata_path, data_folder=data_dir, k=2):
raise ValueError(f"Failed to initialize RAG system: {e}")



def encode_documents(path, chunk_size=1000, chunk_overlap=200):
"""
Encodes all text files into a vector store using OpenAI embeddings and includes metadata.
Expand Down

0 comments on commit aa20306

Please sign in to comment.