Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added checks for inappropriate user prompts and system responses #59

Merged
merged 1 commit into from
Dec 5, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 90 additions & 1 deletion MinuteMate/back/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,17 @@
import logging
from typing import Optional, List

from fastapi import FastAPI, HTTPException

Check failure on line 5 in MinuteMate/back/main.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F401)

MinuteMate/back/main.py:5:30: F401 `fastapi.HTTPException` imported but unused

Check failure on line 5 in MinuteMate/back/main.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F401)

MinuteMate/back/main.py:5:30: F401 `fastapi.HTTPException` imported but unused
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field

import weaviate
from weaviate.classes.init import Auth
from weaviate.classes.query import Rerank, MetadataQuery

Check failure on line 11 in MinuteMate/back/main.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F401)

MinuteMate/back/main.py:11:36: F401 `weaviate.classes.query.Rerank` imported but unused

Check failure on line 11 in MinuteMate/back/main.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F401)

MinuteMate/back/main.py:11:44: F401 `weaviate.classes.query.MetadataQuery` imported but unused

Check failure on line 11 in MinuteMate/back/main.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F401)

MinuteMate/back/main.py:11:36: F401 `weaviate.classes.query.Rerank` imported but unused

Check failure on line 11 in MinuteMate/back/main.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F401)

MinuteMate/back/main.py:11:44: F401 `weaviate.classes.query.MetadataQuery` imported but unused

import openai

Check failure on line 13 in MinuteMate/back/main.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F401)

MinuteMate/back/main.py:13:8: F401 `openai` imported but unused

Check failure on line 13 in MinuteMate/back/main.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F401)

MinuteMate/back/main.py:13:8: F401 `openai` imported but unused
from openai import OpenAI


from rake_nltk import Rake
from dotenv import load_dotenv

Expand Down Expand Up @@ -195,13 +194,98 @@
]
)
return response.choices[0].message.content

except Exception as e:
logger.error(f"OpenAI generation error: {e}")
return "I'm sorry, but I couldn't generate a response."

def check_prompt(self, prompt: str) -> str:
"""Check prompt appropriateness using OpenAI"""

try:
response = self.openai_client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": """A local government hosts a chat system that uses retrieval-augmented generation
to improve public access to the contents of its public meetings. The system has access to
meeting agendas, minutes, and transcriptions.

Your role is to determine whether prompts provided by users of this system are appropriate.
It's very important that users be able to access reasonable to reasonable requests, but toxic,
abusive, or illegal responses should be identified.

Requests seeking information that is accurate and politically relevant are appropriate,
even if the information sought is embarassing to the government or individuals or includes
references to abusive, illegal, or controversial actions or ideas.

The first word of your response is always 'appropriate', 'inappropriate', or 'ambiguous'.
The rest of your response provides the top three to five concise factors that explain this decision."""
},
{
"role": "user",
"content": prompt
}
]
)
if response.choices[0].message.content.split(maxsplit=1)[0] in {'appropriate', 'inappropriate', 'ambiguous'}:
return response.choices[0].message.content
else:
return 'error generating prompt check'

except Exception as e:
logger.error(f"OpenAI generation error: {e}")
return "I'm sorry, but I couldn't generate a response."

def check_response(self, prompt: str) -> str:
"""Check response appropriateness using OpenAI"""

try:
response = self.openai_client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": """A local government hosts a chat system that uses retrieval-augmented generation
to improve public access to the contents of its public meetings. The system has access to
meeting agendas, minutes, and transcriptions.

Your role is to determine whether the chat system's responses to prompts are appropriate.
It's very important that the chat system be able to deliver reasonable responses,
but clearly toxic, abusive, or illegal responses should be identified.

Information that is accurate and politically relevant is appropriate, even if it is embarassing
to the government or individuals or includes references to abusive, illegal, or controversial
actions or ideas.

The first word of your response is always 'appropriate', 'inappropriate', or 'ambiguous'.
The rest of your response provides the top three to five concise factors that explain this decision."""
},
{
"role": "user",
"content": prompt
}
]
)
if response.choices[0].message.content.split(maxsplit=1)[0] in {'appropriate', 'inappropriate', 'ambiguous'}:
return response.choices[0].message.content
else:
return 'error generating response check'

except Exception as e:
logger.error(f"OpenAI generation error: {e}")
return "I'm sorry, but I couldn't generate a response."

def process_prompt(self, prompt_request: PromptRequest) -> PromptResponse:
"""Main method to process user prompt"""
try:

# Check the user prompt for inappropriate content
prompt_check = self.check_prompt(prompt_request.user_prompt_text)
if prompt_check.split(maxsplit=1)[0] == 'inappropriate':
return PromptResponse(generated_response = 'inappropriate prompt detected')

# Search for relevant context
context_segments, keywords = self.search_weaviate(prompt_request.user_prompt_text)

Expand All @@ -211,6 +295,11 @@
context_segments
)

# Check the generated response for inappropriate content
response_check = self.check_response(prompt_request.user_prompt_text)
if response_check.split(maxsplit=1)[0] == 'inappropriate':
return PromptResponse(generated_response = 'inappropriate response detected')

return PromptResponse(
generated_response=generated_response,
context_segments=context_segments,
Expand Down
Loading