Skip to content

Commit

Permalink
Cleaned Repo + Updated Front Application + Metadata Searches
Browse files Browse the repository at this point in the history
- There was a lot of old non-used files, so got rid of them. I saved some of the testing locally, but there are a lot of changes so will reintroduce testing once app is a bit further along.

- Divided Front App into three Sections
1. Home Page where you can input openai api  key + also chose from what municipality you are wanting to interact with.
2. Chat Page: Added a sidebar to give back most relevant chunks along with the metadata.
3. View document page just a list of all the meeting dates recorded will add downloadable files from this page in the future + a basic search topic

- Implemented some metadata capabilities
  • Loading branch information
RileyLePrell committed Dec 20, 2024
1 parent f281f99 commit abff657
Show file tree
Hide file tree
Showing 17 changed files with 372 additions and 1,000 deletions.
2 changes: 1 addition & 1 deletion MinuteMate/back/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ services:
context: ./
dockerfile: Dockerfile
ports:
- 8001:8001
- 8000:8000
environment:
- WEAVIATE_ENDPOINT_URL=$WEAVIATE_ENDPOINT_URL
- WEAVIATE_API_KEY=$WEAVIATE_API_KEY
Expand Down
101 changes: 48 additions & 53 deletions MinuteMate/back/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,19 @@
else:
ssl._create_default_https_context = _create_unverified_https_context


try:
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')
except Exception as e:
print(f"Error downloading NLTK resources: {e}")


# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

load_dotenv()


# Initialize the FastAPI app
app = FastAPI(
title="MinuteMate Prompt & Response API",
Expand All @@ -58,14 +55,15 @@
allow_headers=["*"], # Allows all headers
)

# Define request and response models
class PromptRequest(BaseModel):
user_prompt_text: str = Field(..., min_length=1, max_length=1000)

# Update ContextSegment to include a metadata dictionary
class ContextSegment(BaseModel):
chunk_id: int
content: str
score: Optional[float] = None
metadata: Optional[dict] = None # Add metadata support

class PromptRequest(BaseModel):
user_prompt_text: str = Field(..., min_length=1, max_length=1000)

class PromptResponse(BaseModel):
generated_response: str
Expand Down Expand Up @@ -127,15 +125,14 @@ def load_env_vars(self):
def extract_keywords(self, text: str) -> List[str]:
"""Extract keywords using RAKE"""
try:

rake = Rake()
rake.extract_keywords_from_text(text)
return rake.get_ranked_phrases()[:3]
except Exception as e:
logger.error(f"Keyword extraction error: {e}")
return []

def search_weaviate(self, query: str, search_type: str = 'keyword') -> List[ContextSegment]:
def search_weaviate(self, query: str, search_type: str = 'keyword'):
"""Perform search in Weaviate database"""
try:
collection = self.weaviate_client.collections.get('MeetingDocument')
Expand All @@ -146,7 +143,6 @@ def search_weaviate(self, query: str, search_type: str = 'keyword') -> List[Cont
query=",".join(keywords),
limit=5
)
print(keywords)
elif search_type == 'vector':
embedding = self.openai_client.embeddings.create(
model='text-embedding-3-small',
Expand All @@ -160,32 +156,60 @@ def search_weaviate(self, query: str, search_type: str = 'keyword') -> List[Cont
else:
raise ValueError(f"Unsupported search type: {search_type}")

context_segments = [
# Extract metadata fields from properties
# Make sure these fields exist in your Weaviate schema and data.
context_segments = [
ContextSegment(
chunk_id=int(item.properties.get('chunk_id', 0)),
content=item.properties.get('content', ''),
score=getattr(item.metadata, 'distance', None)
score=getattr(item.metadata, 'distance', None),
metadata={
"meeting_date": item.properties.get('meeting_date', ''),
"meeting_type": item.properties.get('meeting_type', ''),
"file_type": item.properties.get('file_type', ''),
"chunk_index": item.properties.get('chunk_index', ''),
"source_document": item.properties.get('source_document', '')
}
) for item in results.objects
]

keywords = self.extract_keywords(query) # Ensure keywords are returned even for vector search
return context_segments, keywords
except Exception as e:
logger.error(f"Weaviate search error: {e}")
return []
return [], []

def generate_response(self, prompt: str, context_segments: List[ContextSegment]) -> str:
"""Generate response using OpenAI"""
context_text = "\n".join([
f"<ContextSegment{seg.chunk_id}>\n{seg.content}"
for seg in context_segments
])

# Include metadata in the context to help the LLM make more informed decisions
context_text_list = []
for seg in context_segments:
meta = seg.metadata if seg.metadata else {}
context_text_list.append(
f"<ContextSegment{seg.chunk_id}>\n"
f"Content: {seg.content}\n"
f"Meeting Date: {meta.get('meeting_date', 'N/A')}\n"
f"Meeting Type: {meta.get('meeting_type', 'N/A')}\n"
f"File Type: {meta.get('file_type', 'N/A')}\n"
f"Chunk Index: {meta.get('chunk_index', 'N/A')}\n"
f"Source Document: {meta.get('source_document', 'N/A')}\n"
)

context_text = "\n".join(context_text_list)

try:
response = self.openai_client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": f"Use this context if relevant: {context_text}"
"content": (
"You are an assistant that uses retrieved meeting data with metadata. "
"Consider the given context segments and their metadata to provide a more accurate and informed response. "
"If the metadata (like meeting_date, meeting_type, etc.) is relevant, incorporate it into your answer.\n\n"
f"Use this context if relevant:\n{context_text}"
)
},
{
"role": "user",
Expand All @@ -201,27 +225,14 @@ def generate_response(self, prompt: str, context_segments: List[ContextSegment])

def check_prompt(self, prompt: str) -> str:
"""Check prompt appropriateness using OpenAI"""

try:
response = self.openai_client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": """A local government hosts a chat system that uses retrieval-augmented generation
to improve public access to the contents of its public meetings. The system has access to
meeting agendas, minutes, and transcriptions.
Your role is to determine whether prompts provided by users of this system are appropriate.
It's very important that users be able to access reasonable to reasonable requests, but toxic,
abusive, or illegal responses should be identified.
Requests seeking information that is accurate and politically relevant are appropriate,
even if the information sought is embarassing to the government or individuals or includes
references to abusive, illegal, or controversial actions or ideas.
The first word of your response is always 'appropriate', 'inappropriate', or 'ambiguous'.
The rest of your response provides the top three to five concise factors that explain this decision."""
to improve public access to the contents of its public meetings... (same instructions)"""
},
{
"role": "user",
Expand All @@ -240,27 +251,13 @@ def check_prompt(self, prompt: str) -> str:

def check_response(self, prompt: str) -> str:
"""Check response appropriateness using OpenAI"""

try:
response = self.openai_client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": """A local government hosts a chat system that uses retrieval-augmented generation
to improve public access to the contents of its public meetings. The system has access to
meeting agendas, minutes, and transcriptions.
Your role is to determine whether the chat system's responses to prompts are appropriate.
It's very important that the chat system be able to deliver reasonable responses,
but clearly toxic, abusive, or illegal responses should be identified.
Information that is accurate and politically relevant is appropriate, even if it is embarassing
to the government or individuals or includes references to abusive, illegal, or controversial
actions or ideas.
The first word of your response is always 'appropriate', 'inappropriate', or 'ambiguous'.
The rest of your response provides the top three to five concise factors that explain this decision."""
"content": """A local government hosts a chat system... (same instructions)"""
},
{
"role": "user",
Expand All @@ -280,11 +277,10 @@ def check_response(self, prompt: str) -> str:
def process_prompt(self, prompt_request: PromptRequest) -> PromptResponse:
"""Main method to process user prompt"""
try:

# Check the user prompt for inappropriate content
prompt_check = self.check_prompt(prompt_request.user_prompt_text)
if prompt_check.split(maxsplit=1)[0] == 'inappropriate':
return PromptResponse(generated_response = 'inappropriate prompt detected')
return PromptResponse(generated_response='inappropriate prompt detected')

# Search for relevant context
context_segments, keywords = self.search_weaviate(prompt_request.user_prompt_text)
Expand All @@ -298,12 +294,12 @@ def process_prompt(self, prompt_request: PromptRequest) -> PromptResponse:
# Check the generated response for inappropriate content
response_check = self.check_response(prompt_request.user_prompt_text)
if response_check.split(maxsplit=1)[0] == 'inappropriate':
return PromptResponse(generated_response = 'inappropriate response detected')
return PromptResponse(generated_response='inappropriate response detected')

return PromptResponse(
generated_response=generated_response,
context_segments=context_segments,
keywords = keywords,
keywords=keywords,
error_code=0
)

Expand All @@ -323,9 +319,8 @@ async def process_prompt_endpoint(prompt_request: PromptRequest):
"""Process user prompt and return response"""
return processor.process_prompt(prompt_request)


# Cleanup on shutdown
@app.on_event("shutdown")
async def shutdown_event():
"""Close Weaviate connection on app shutdown"""
processor.weaviate_client.close()
processor.weaviate_client.close()
Loading

0 comments on commit abff657

Please sign in to comment.