From 1183362994a8bc7cf9c26cb2ce018bef30220b35 Mon Sep 17 00:00:00 2001
From: neal logan <nlogan006@gmail.com>
Date: Thu, 5 Dec 2024 13:16:28 -0500
Subject: [PATCH] Added checks for inappropriate user prompts and system
 responses

---
 MinuteMate/back/main.py | 91 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 90 insertions(+), 1 deletion(-)

diff --git a/MinuteMate/back/main.py b/MinuteMate/back/main.py
index 61ee4fc6..322300b2 100644
--- a/MinuteMate/back/main.py
+++ b/MinuteMate/back/main.py
@@ -13,7 +13,6 @@
 import openai
 from openai import OpenAI
 
-
 from rake_nltk import Rake
 from dotenv import load_dotenv
 
@@ -195,6 +194,85 @@ def generate_response(self, prompt: str, context_segments: List[ContextSegment])
                 ]
             )
             return response.choices[0].message.content
+        
+        except Exception as e:
+            logger.error(f"OpenAI generation error: {e}")
+            return "I'm sorry, but I couldn't generate a response."
+
+    def check_prompt(self, prompt: str) -> str:
+        """Check prompt appropriateness using OpenAI"""
+
+        try:
+            response = self.openai_client.chat.completions.create(
+                model="gpt-4o",
+                messages=[
+                    {
+                        "role": "system", 
+                        "content": """A local government hosts a chat system that uses retrieval-augmented generation 
+                        to improve public access to the contents of its public meetings.  The system has access to 
+                        meeting agendas, minutes, and transcriptions.  
+                        
+                        Your role is to determine whether prompts provided by users of this system are appropriate. 
+                        It's very important that users be able to access reasonable to reasonable requests, but toxic, 
+                        abusive, or illegal responses should be identified.  
+                        
+                        Requests seeking information that is accurate and politically relevant are appropriate, 
+                        even if the information sought is embarassing to the government or individuals or includes
+                        references to abusive, illegal, or controversial actions or ideas.  
+                        
+                        The first word of your response is always 'appropriate', 'inappropriate', or 'ambiguous'. 
+                        The rest of your response provides the top three to five concise factors that explain this decision."""
+                    },
+                    {
+                        "role": "user", 
+                        "content": prompt
+                    }
+                ]
+            )
+            if response.choices[0].message.content.split(maxsplit=1)[0] in {'appropriate', 'inappropriate', 'ambiguous'}:
+                return response.choices[0].message.content
+            else:
+                return 'error generating prompt check'
+        
+        except Exception as e:
+            logger.error(f"OpenAI generation error: {e}")
+            return "I'm sorry, but I couldn't generate a response."
+
+    def check_response(self, prompt: str) -> str:
+        """Check response appropriateness using OpenAI"""
+
+        try:
+            response = self.openai_client.chat.completions.create(
+                model="gpt-4o",
+                messages=[
+                    {
+                        "role": "system", 
+                        "content": """A local government hosts a chat system that uses retrieval-augmented generation 
+                        to improve public access to the contents of its public meetings.  The system has access to 
+                        meeting agendas, minutes, and transcriptions.  
+                        
+                        Your role is to determine whether the chat system's responses to prompts are appropriate. 
+                        It's very important that the chat system be able to deliver reasonable responses, 
+                        but clearly toxic, abusive, or illegal responses should be identified.
+                        
+                        Information that is accurate and politically relevant is appropriate, even if it is embarassing 
+                        to the government or individuals or includes references to abusive, illegal, or controversial 
+                        actions or ideas.
+                        
+                        The first word of your response is always 'appropriate', 'inappropriate', or 'ambiguous'.  
+                        The rest of your response provides the top three to five concise factors that explain this decision."""
+                    },
+                    {
+                        "role": "user", 
+                        "content": prompt
+                    }
+                ]
+            )
+            if response.choices[0].message.content.split(maxsplit=1)[0] in {'appropriate', 'inappropriate', 'ambiguous'}:
+                return response.choices[0].message.content
+            else:
+                return 'error generating response check'
+        
         except Exception as e:
             logger.error(f"OpenAI generation error: {e}")
             return "I'm sorry, but I couldn't generate a response."
@@ -202,6 +280,12 @@ def generate_response(self, prompt: str, context_segments: List[ContextSegment])
     def process_prompt(self, prompt_request: PromptRequest) -> PromptResponse:
         """Main method to process user prompt"""
         try:
+            
+            # Check the user prompt for inappropriate content
+            prompt_check = self.check_prompt(prompt_request.user_prompt_text)
+            if prompt_check.split(maxsplit=1)[0] == 'inappropriate':
+                return PromptResponse(generated_response = 'inappropriate prompt detected')
+
             # Search for relevant context
             context_segments, keywords = self.search_weaviate(prompt_request.user_prompt_text)
             
@@ -211,6 +295,11 @@ def process_prompt(self, prompt_request: PromptRequest) -> PromptResponse:
                 context_segments
             )
 
+            # Check the generated response for inappropriate content
+            response_check = self.check_response(prompt_request.user_prompt_text)
+            if response_check.split(maxsplit=1)[0] == 'inappropriate':
+                return PromptResponse(generated_response = 'inappropriate response detected')
+
             return PromptResponse(
                 generated_response=generated_response,
                 context_segments=context_segments,