From 4d7fc75d288723569e1634dfcb6d58e01418b24b Mon Sep 17 00:00:00 2001 From: Daniel Johnson <69827271+danielrjc@users.noreply.github.com> Date: Wed, 1 May 2024 23:21:34 -0400 Subject: [PATCH] Code cleanup (#23) * Added comments * Removed unused context files * Update embeddingService.py * Update embeddingService.py --------- Co-authored-by: Ilamparithi I <63252233+Ilamparithi-I@users.noreply.github.com> --- Frontend/src/context/auth.context.tsx | 33 ------------------- Frontend/src/hooks/useAuthContext.ts | 12 ------- Frontend/src/hooks/useCookieContext.js | 12 ------- Frontend/src/hooks/useLogin.ts | 2 -- Frontend/src/main.tsx | 1 - Frontend/src/pages/auth/Login.tsx | 2 +- .../src/pages/dashboards/AdminDashboard.tsx | 2 -- backend/business/adminService.py | 4 +++ backend/business/courseService.py | 16 +++++---- backend/business/embeddingService.py | 13 ++++---- backend/business/extractionService.py | 2 +- backend/business/generationService.py | 10 +++--- backend/business/queryService.py | 8 ++++- backend/business/reportService.py | 5 +-- backend/business/userService.py | 4 +++ backend/config.py | 5 ++- backend/data/models/report.py | 2 +- backend/extensions.py | 1 - backend/web/courseController.py | 6 ++-- backend/web/queryController.py | 3 -- backend/web/userController.py | 4 +-- 21 files changed, 47 insertions(+), 100 deletions(-) delete mode 100644 Frontend/src/context/auth.context.tsx delete mode 100644 Frontend/src/hooks/useAuthContext.ts delete mode 100644 Frontend/src/hooks/useCookieContext.js diff --git a/Frontend/src/context/auth.context.tsx b/Frontend/src/context/auth.context.tsx deleted file mode 100644 index 7dec5d2..0000000 --- a/Frontend/src/context/auth.context.tsx +++ /dev/null @@ -1,33 +0,0 @@ -import { createContext, useReducer } from 'react' - -export const AuthContext = createContext(null) - -export const authReducer = (state: any, action: any) => { - switch (action.type) { - case 'LOGIN': - return { - user: action.payload - } - case 'LOGOUT': - return { - user: null - } - default: - return state - } - -} - -export const AuthContextProvider = ( children: any ) => { - - const [state, dispatch] = useReducer(authReducer, { - user: null - }) - - return ( - - {children} - - ) - -} \ No newline at end of file diff --git a/Frontend/src/hooks/useAuthContext.ts b/Frontend/src/hooks/useAuthContext.ts deleted file mode 100644 index cd9470b..0000000 --- a/Frontend/src/hooks/useAuthContext.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { useContext } from "react"; -import { AuthContext } from "../context/auth.context.js"; - -export const useAuthContext = () => { - const context = useContext(AuthContext); - - if (!context) { - throw Error('useAuthContext must be used in a AuthContextProvider') - } - - return context -} \ No newline at end of file diff --git a/Frontend/src/hooks/useCookieContext.js b/Frontend/src/hooks/useCookieContext.js deleted file mode 100644 index 10b7e8b..0000000 --- a/Frontend/src/hooks/useCookieContext.js +++ /dev/null @@ -1,12 +0,0 @@ -import { useContext } from 'react' -import { CookieContext } from '../context/cookie.context' - -export const useCookieContext = () => { - const context = useContext(CookieContext); - - if (!context) { - throw Error('useCookieContext must be used in a CookieContextProvider') - } - - return context -} \ No newline at end of file diff --git a/Frontend/src/hooks/useLogin.ts b/Frontend/src/hooks/useLogin.ts index d4dfe7f..8fd29cf 100644 --- a/Frontend/src/hooks/useLogin.ts +++ b/Frontend/src/hooks/useLogin.ts @@ -7,8 +7,6 @@ export const useLogin = () => { const [error, setError] = useState('') const [isLoading, setIsLoading] = useState(false) - // const { dispatch } = useAuthContext() - const login = async (email: string, password: string) => { setIsLoading(true) const user = new CognitoUser({ diff --git a/Frontend/src/main.tsx b/Frontend/src/main.tsx index 52e256d..f6d9b25 100644 --- a/Frontend/src/main.tsx +++ b/Frontend/src/main.tsx @@ -1,7 +1,6 @@ import React from 'react' import ReactDOM from 'react-dom/client' import App from './App.tsx' -// import { AuthContextProvider } from './context/auth.context.tsx' import './index.css' ReactDOM.createRoot(document.getElementById('root')!).render( diff --git a/Frontend/src/pages/auth/Login.tsx b/Frontend/src/pages/auth/Login.tsx index e1e3a05..a19de62 100644 --- a/Frontend/src/pages/auth/Login.tsx +++ b/Frontend/src/pages/auth/Login.tsx @@ -19,7 +19,7 @@ const Login = () => { if(cognitoUser){ let userId; - // get user id from user + // Gets the user id from cognito session cognitoUser.getSession(function(err: Error | null, session: CognitoUserSession) { if (err) { alert(err.message || JSON.stringify(err)); diff --git a/Frontend/src/pages/dashboards/AdminDashboard.tsx b/Frontend/src/pages/dashboards/AdminDashboard.tsx index ee75845..163a2e0 100644 --- a/Frontend/src/pages/dashboards/AdminDashboard.tsx +++ b/Frontend/src/pages/dashboards/AdminDashboard.tsx @@ -4,7 +4,6 @@ import { Badge } from '@/components/badge/Badge'; import { faEllipsisV, faPlusCircle } from '@fortawesome/free-solid-svg-icons'; import SideNav from '@/components/navbar/SideNav'; import React, { useState, useEffect } from "react"; -// import { useParams } from "react-router-dom" import { Book, @@ -47,7 +46,6 @@ type CourseData = { } const AdminDashboard = () => { - // const { id } = useParams<{ id: string }>() const [courses, setCourses] = useState([]); const [instructors, setInstructors] = useState([]); const [selectedCourseCode, setSelectedCourseCode] = useState(''); diff --git a/backend/business/adminService.py b/backend/business/adminService.py index 8982529..ff025fa 100644 --- a/backend/business/adminService.py +++ b/backend/business/adminService.py @@ -4,6 +4,7 @@ from .courseService import get_courses import base64 +# Gets the configuration of the institution def get_config(): try: with open('core/config/logo.png', 'rb') as image_file: @@ -21,6 +22,7 @@ def get_config(): 'colour': primary_colour } +# Updates the configuration of the institution (logo and primary colour) def update_config(update_config_data): if 'logo' in update_config_data and update_config_data['logo'] is not None: logo_base64 = update_config_data['logo'] @@ -43,6 +45,7 @@ def update_config(update_config_data): return True +# Updates the role of a user, can be Admin, Instructor or Student def update_users(update_user_data): user = User.query.get(update_user_data['id']) print("user", user) @@ -63,6 +66,7 @@ def update_users(update_user_data): return True +# Removes a user from the institution def delete_user(delete_user_data): user = User.query.get(delete_user_data['user_id']) if user is None: diff --git a/backend/business/courseService.py b/backend/business/courseService.py index ee6cd87..2367c17 100644 --- a/backend/business/courseService.py +++ b/backend/business/courseService.py @@ -86,7 +86,7 @@ def join_course(join_course_data): return True return False -# Uploads course document to s3 +# Uploads course document to s3, adds document to document table and adds document to vector store def upload_course_document(course_document_data): print(course_document_data) file = course_document_data['document'] @@ -122,32 +122,33 @@ def upload_course_document(course_document_data): 'document_id': str(document.id)}}) s3_url = "s3://" + Config.BUCKET_NAME + "/" + str(course_document_data['course_id']) + "/" + file.filename - # add document to vector store + + # Adds document to vector store thread = threading.Thread(target=vectorize_documents, args=(str(document.course_id), s3_url, str(document.id),)) thread.start() except Exception as e: print(e) return False - # return file name and id file_data = { 'name': new_filename, 'id': document.id } - print("file_data: ", file_data) return file_data def delete_course_document(course_document_data): document = Document.query.get(course_document_data['document_id']) if document: + # Removes document from s3 and document table s3 = Config.SESSION.client('s3') s3_url = "s3://" + Config.BUCKET_NAME + "/" + str(document.course_id) + "/" + document.name s3.delete_object(Bucket=Config.BUCKET_NAME, Key=str(document.course_id) + "/" + document.name) course_id = str(document.course_id) db.session.delete(document) db.session.commit() - # remove from vector store + + # Removes document from vector store thread = threading.Thread(target=delete_document_vectors, args=(course_id, s3_url)) thread.start() return True @@ -191,13 +192,16 @@ def get_courses(): courses = Course.query.all() return courses -# get course +# Get course with a specific course_id def get_course(course_id): course = Course.query.get(course_id) return course +# Vectorizes and adds document to vector store def vectorize_documents(course_id, s3_url, document_id): + # Extracts plain text from document documents = extractionService.extract_text(s3_url, document_id) + vectordb = Chroma( client=vecdb, collection_name=course_id, diff --git a/backend/business/embeddingService.py b/backend/business/embeddingService.py index 0111e5a..b72ac15 100644 --- a/backend/business/embeddingService.py +++ b/backend/business/embeddingService.py @@ -3,9 +3,9 @@ from langchain_community.embeddings import SagemakerEndpointEmbeddings from langchain_community.embeddings.sagemaker_endpoint import EmbeddingsContentHandler from ..config import Config -# profile_name, embedding_endpoint_name, region_name, environment -# embedding model object -# ref: + +# Embedding model object +# ref: https://python.langchain.com/docs/integrations/llms/sagemaker/ class ContentHandler(EmbeddingsContentHandler): content_type = "application/json" accepts = "application/json" @@ -20,8 +20,6 @@ def transform_output(self, output: bytes) -> List[List[float]]: content_handler = ContentHandler() - -# Conditionally include credentials_profile_name based on profile_name embedding_args = { "endpoint_name": Config.EMBEDDING_ENDPOINT_NAME, "region_name": Config.REGION_NAME, @@ -29,6 +27,7 @@ def transform_output(self, output: bytes) -> List[List[float]]: "endpoint_kwargs": {"CustomAttributes": "accept_eula=true"} } +# Conditionally include credentials_profile_name based on environment if Config.ENVIRONMENT != 'production': embedding_args["credentials_profile_name"] = Config.PROFILE_NAME @@ -51,7 +50,7 @@ def transform_output(self, output: bytes) -> List[List[float]]: query_content_handler = QueryContentHandler() -# Conditionally include credentials_profile_name based on profile_name +# Conditionally include credentials_profile_name based on environment query_embedding_args = { "endpoint_name": Config.EMBEDDING_ENDPOINT_NAME, "region_name": Config.REGION_NAME, @@ -62,4 +61,4 @@ def transform_output(self, output: bytes) -> List[List[float]]: if Config.ENVIRONMENT != 'production': embedding_args["credentials_profile_name"] = Config.PROFILE_NAME -query_embedding = SagemakerEndpointEmbeddings(**query_embedding_args) \ No newline at end of file +query_embedding = SagemakerEndpointEmbeddings(**query_embedding_args) diff --git a/backend/business/extractionService.py b/backend/business/extractionService.py index a9d7aef..9fd1158 100644 --- a/backend/business/extractionService.py +++ b/backend/business/extractionService.py @@ -2,7 +2,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter from ..config import Config - +# Extracts plain text from PDF stored in S3 using Textract def extract_text(s3_path, document_id): client = Config.SESSION.client("textract", region_name=Config.REGION_NAME) loader = AmazonTextractPDFLoader(s3_path, client=client) diff --git a/backend/business/generationService.py b/backend/business/generationService.py index 1c04254..214e54a 100644 --- a/backend/business/generationService.py +++ b/backend/business/generationService.py @@ -4,6 +4,7 @@ from langchain.prompts import PromptTemplate from ..config import Config +# ref: https://python.langchain.com/docs/integrations/llms/sagemaker/ class ContentHandler(LLMContentHandler): content_type = "application/json" accepts = "application/json" @@ -40,17 +41,14 @@ def transform_output(self, output: bytes) -> str: llm_open = SagemakerEndpoint(**llm_open_args) - -# llm_open = Ollama(model="mistral") - -template = """ [INST] +template = """ [INST] You are a helpful assistant that provides direct and concise answers based only on the provided information. Use the following information from the course information to answer the user's question. If the answer is not present in the provided information, your answer must only be 'I do not know the answer'. -Do not refer to the fact that there are provided course documents in your answer, just directly answer the question. +Do not refer to the fact that there are provided course documents in your answer, just directly answer the question. < -- COURSE INFORMATION -- > {context} < -- END COURSE INFORMATION -- > -< -- QUESTION -- > +< -- QUESTION -- > {question} < -- END QUESTION -- > Solution: diff --git a/backend/business/queryService.py b/backend/business/queryService.py index 1b6666f..e93b270 100644 --- a/backend/business/queryService.py +++ b/backend/business/queryService.py @@ -16,7 +16,7 @@ def query_llm(query_data): user_id = query_data['user_id'] question = question - # use chroma and sagemaker embeddings to get documents + # Retrieve documents using chroma and sagemaker embeddings vectordb = Chroma(client=vecdb, collection_name = course_id ,embedding_function=query_embedding) retriever = vectordb.as_retriever() @@ -28,6 +28,7 @@ def query_llm(query_data): chain_type_kwargs={"prompt": prompt}) llm_response = qa_chain(question) + # If its a new conversation, create a new conversation if (conversation_id is None): conversation = Conversation( name = question, @@ -45,6 +46,8 @@ def query_llm(query_data): ) db.session.add(query) db.session.commit() + + # Get temporary url for the source documents s3 = Config.SESSION.client('s3') sources = [] for source in llm_response["source_documents"]: @@ -61,6 +64,7 @@ def query_llm(query_data): return response +# Extracts bucket name and object name from s3 url def split_s3_url(source): parts = source.replace("s3://", "").split("/", 1) bucket_name = parts[0] @@ -68,6 +72,7 @@ def split_s3_url(source): return {"bucket_name": bucket_name, "object_name": object_name} +# Gets the list of questions and answers for a conversation def query_list(query_data): conversation = Conversation.query.get(query_data['conversation_id']) queries = sorted(conversation.queries, key=lambda query: query.date) @@ -83,6 +88,7 @@ def query_list(query_data): } return response +# Gets the list of conversations between a user and a course def conversation_history(query_data): conversations = Conversation.query.filter_by(user_id=query_data['user_id'], course_id=query_data['course_id']).order_by(Conversation.date.desc()).all() cId = get_course(query_data['course_id']) diff --git a/backend/business/reportService.py b/backend/business/reportService.py index 09dcd8a..946515f 100644 --- a/backend/business/reportService.py +++ b/backend/business/reportService.py @@ -6,7 +6,7 @@ from ..extensions import db from .courseService import get_course - +# Reports a conversation and assigns the report to the instructor def report(report_data): conversation = Conversation.query.get(report_data['conversation_id']) if not conversation: @@ -16,6 +16,7 @@ def report(report_data): if not course: return None + # Get the instructor of the course that the conversation belongs to instructor = User.query.join(Course.users) \ .filter(Course.id == conversation.course_id) \ .filter(User.role == Role.Instructor) \ @@ -32,7 +33,7 @@ def report(report_data): return report.id - +# Retrieves all reports for an instructor def list_reports(report_data): reports = Report.query.filter_by(instructor_id=report_data['instructor_id']).all() diff --git a/backend/business/userService.py b/backend/business/userService.py index c04a2f9..18938cb 100644 --- a/backend/business/userService.py +++ b/backend/business/userService.py @@ -12,6 +12,8 @@ def register(create_user_data): users = get_users() role = Role.Student + + # Makes the first user an admin if len(users) == 0: role = Role.Admin @@ -45,6 +47,8 @@ def login(login_data): # Changes user password def change_password(change_password_data): user = User.query.get(change_password_data['user_id']) + + # Checks if the password is correct if bcrypt.check_password_hash(user.password, change_password_data['old_password']): user.password = bcrypt.generate_password_hash(change_password_data['new_password']).decode('utf-8') db.session.commit() diff --git a/backend/config.py b/backend/config.py index f6b502b..1a7d166 100644 --- a/backend/config.py +++ b/backend/config.py @@ -2,9 +2,8 @@ import os import json - class Config: - ENVIRONMENT = os.environ.get('environment') + ENVIRONMENT = os.environ.get('environment') if ENVIRONMENT != 'production': PROFILE_NAME = '' # input for local testing EMBEDDING_ENDPOINT_NAME = '' # input for local testing @@ -31,5 +30,5 @@ class Config: # Set the SQLALCHEMY_DATABASE_URI SQLALCHEMY_DATABASE_URI = 'postgresql://' + str(DBUser) + ':' + str(DBPassword) + '@' + os.environ.get('postgres_hostname') + ':' + os.environ.get('postgres_port') + '/' + os.environ.get('postgres_database_name') - + diff --git a/backend/data/models/report.py b/backend/data/models/report.py index 9f4109d..ab40a37 100644 --- a/backend/data/models/report.py +++ b/backend/data/models/report.py @@ -10,4 +10,4 @@ class Report(db.Model): reason = Column(Text, nullable=False) conversation_id = Column(UUID(as_uuid=True), ForeignKey('conversations.id'), nullable=False) instructor_id = Column(UUID(as_uuid=True), ForeignKey('users.id'), nullable=False) - + diff --git a/backend/extensions.py b/backend/extensions.py index a1407dc..adbf417 100644 --- a/backend/extensions.py +++ b/backend/extensions.py @@ -3,7 +3,6 @@ from flask_bcrypt import Bcrypt import chromadb - db = SQLAlchemy() login_manager = LoginManager() bcrypt = Bcrypt() diff --git a/backend/web/courseController.py b/backend/web/courseController.py index f4ac017..1138cd2 100644 --- a/backend/web/courseController.py +++ b/backend/web/courseController.py @@ -79,7 +79,7 @@ def delete_course_document(courseId, documentId): return jsonify({'error': 'Document does not exist'}), 400 return jsonify({'message': 'Document deleted'}), 200 -# get all courses +# Gets all courses @courseBp.route('', methods=['GET']) @login_required def get_courses(user): @@ -110,7 +110,7 @@ def join_course(user): except: return jsonify({'error': 'Internal Server Error'}), 500 -# get all course documents +# Gets all course documents @courseBp.route('//documents', methods=['GET']) # @login_required def get_course_documents(courseId): @@ -130,7 +130,7 @@ def list_enrolled_students(courseId): list_enrolled_students = courseService.list_enrolled_students(list_enrolled_students_data) return jsonify(list_enrolled_students), 200 -# remove student from course +# Removes student from course @courseBp.route('//users/', methods=['DELETE']) # @login_required def remove_student_from_course(courseId, student_id): diff --git a/backend/web/queryController.py b/backend/web/queryController.py index 33f5afa..872eb2f 100644 --- a/backend/web/queryController.py +++ b/backend/web/queryController.py @@ -8,7 +8,6 @@ @login_required def query_llm(user): data = request.get_json() - # TODO: Validate data query_data = { 'question': data['question'], 'course_id': data['course_id'], @@ -23,7 +22,6 @@ def query_llm(user): @queryBp.route('/conversations/', methods=['GET']) def query_list(conversationId): - # TODO: Validate data query_data = { 'conversation_id': conversationId, } @@ -36,7 +34,6 @@ def query_list(conversationId): @queryBp.route('//conversations', methods=['GET']) @login_required def conversation_history(user, courseId): - # TODO: Validate data query_data = { 'course_id': courseId, 'user_id': user.id, # add by middleware TODO diff --git a/backend/web/userController.py b/backend/web/userController.py index 7e427b0..3a89d5a 100644 --- a/backend/web/userController.py +++ b/backend/web/userController.py @@ -18,7 +18,6 @@ def register(): def login(): try: data = request.get_json() - # TODO: Validate data except Exception as e: print(f"Error parsing JSON: {str(e)}") @@ -37,7 +36,7 @@ def logout(): else: return jsonify({'error': 'Logout failed'}), 400 -# get user role +# Gets user role @userBp.route('/role', methods=['GET']) @login_required def get_role(user): @@ -52,7 +51,6 @@ def get_role(user): return jsonify({'error': 'Invalid password'}), 400 - @userBp.route('', methods=['PUT']) @login_required def change_password(user):