Code cleanup (#23)

* Added comments * Removed unused context files * Update embeddingService.py * Update embeddingService.py --------- Co-authored-by: Ilamparithi I <63252233+Ilamparithi-I@users.noreply.github.com>
UBC-CIC · May 2, 2024 · 4d7fc75 · 4d7fc75
1 parent 8fba2af
commit 4d7fc75
Show file tree

Hide file tree

Showing 21 changed files with 47 additions and 100 deletions.
diff --git a/Frontend/src/context/auth.context.tsx b/Frontend/src/context/auth.context.tsx
diff --git a/Frontend/src/hooks/useAuthContext.ts b/Frontend/src/hooks/useAuthContext.ts
diff --git a/Frontend/src/hooks/useCookieContext.js b/Frontend/src/hooks/useCookieContext.js
diff --git a/Frontend/src/hooks/useLogin.ts b/Frontend/src/hooks/useLogin.ts
@@ -7,8 +7,6 @@ export const useLogin = () => {
     const [error, setError] = useState('')
     const [isLoading, setIsLoading] = useState(false)
 
-    // const { dispatch } = useAuthContext()
-
     const login = async (email: string, password: string) => {
         setIsLoading(true)
         const user = new CognitoUser({

diff --git a/Frontend/src/main.tsx b/Frontend/src/main.tsx
@@ -1,7 +1,6 @@
 import React from 'react'
 import ReactDOM from 'react-dom/client'
 import App from './App.tsx'
-// import { AuthContextProvider } from './context/auth.context.tsx'
 import './index.css'
 
 ReactDOM.createRoot(document.getElementById('root')!).render(

diff --git a/Frontend/src/pages/auth/Login.tsx b/Frontend/src/pages/auth/Login.tsx
@@ -19,7 +19,7 @@ const Login = () => {
 
         if(cognitoUser){
             let userId;
-            // get user id from user
+            // Gets the user id from cognito session
             cognitoUser.getSession(function(err: Error | null, session: CognitoUserSession) {
                 if (err) {
                     alert(err.message || JSON.stringify(err));

diff --git a/Frontend/src/pages/dashboards/AdminDashboard.tsx b/Frontend/src/pages/dashboards/AdminDashboard.tsx
@@ -4,7 +4,6 @@ import { Badge } from '@/components/badge/Badge';
 import { faEllipsisV, faPlusCircle } from '@fortawesome/free-solid-svg-icons';
 import SideNav from '@/components/navbar/SideNav';
 import React, { useState, useEffect } from "react";
-// import { useParams } from "react-router-dom"
 
 import {
     Book,
@@ -47,7 +46,6 @@ type CourseData = {
 }
 
 const AdminDashboard = () => {
-    // const { id } = useParams<{ id: string }>()
     const [courses, setCourses] = useState<any[]>([]);
     const [instructors, setInstructors] = useState<any[]>([]);
     const [selectedCourseCode, setSelectedCourseCode] = useState<string>('');

diff --git a/backend/business/adminService.py b/backend/business/adminService.py
@@ -4,6 +4,7 @@
 from .courseService import get_courses
 import base64
 
+# Gets the configuration of the institution
 def get_config():
     try:
         with open('core/config/logo.png', 'rb') as image_file:
@@ -21,6 +22,7 @@ def get_config():
         'colour': primary_colour
     }
 
+# Updates the configuration of the institution (logo and primary colour)
 def update_config(update_config_data):
     if 'logo' in update_config_data and update_config_data['logo'] is not None:
         logo_base64 = update_config_data['logo']
@@ -43,6 +45,7 @@ def update_config(update_config_data):
 
     return True
 
+# Updates the role of a user, can be Admin, Instructor or Student
 def update_users(update_user_data):
     user = User.query.get(update_user_data['id'])
     print("user", user)
@@ -63,6 +66,7 @@ def update_users(update_user_data):
 
     return True
 
+# Removes a user from the institution
 def delete_user(delete_user_data):
     user = User.query.get(delete_user_data['user_id'])
     if user is None:

diff --git a/backend/business/courseService.py b/backend/business/courseService.py
@@ -86,7 +86,7 @@ def join_course(join_course_data):
             return True
     return False
 
-# Uploads course document to s3
+# Uploads course document to s3, adds document to document table and adds document to vector store
 def upload_course_document(course_document_data):
     print(course_document_data)
     file = course_document_data['document']
@@ -122,32 +122,33 @@ def upload_course_document(course_document_data):
                                                   'document_id': str(document.id)}})
 
         s3_url = "s3://" + Config.BUCKET_NAME + "/" + str(course_document_data['course_id']) + "/" + file.filename
-        # add document to vector store
+
+        # Adds document to vector store
         thread = threading.Thread(target=vectorize_documents, args=(str(document.course_id), s3_url, str(document.id),))
         thread.start()
     except Exception as e:
         print(e)
         return False
 
-    # return file name and id
     file_data = {
         'name': new_filename,
         'id': document.id
     }
 
-    print("file_data: ", file_data)
     return file_data
 
 def delete_course_document(course_document_data):
     document = Document.query.get(course_document_data['document_id'])
     if document:
+        # Removes document from s3 and document table
         s3 = Config.SESSION.client('s3')
         s3_url = "s3://" + Config.BUCKET_NAME + "/" + str(document.course_id) + "/" + document.name
         s3.delete_object(Bucket=Config.BUCKET_NAME, Key=str(document.course_id) + "/" + document.name)
         course_id = str(document.course_id)
         db.session.delete(document)
         db.session.commit()
-        # remove from vector store
+
+        # Removes document from vector store
         thread = threading.Thread(target=delete_document_vectors, args=(course_id, s3_url))
         thread.start()
         return True
@@ -191,13 +192,16 @@ def get_courses():
     courses = Course.query.all()
     return courses
 
-# get course
+# Get course with a specific course_id
 def get_course(course_id):
     course = Course.query.get(course_id)
     return course
 
+# Vectorizes and adds document to vector store
 def vectorize_documents(course_id, s3_url, document_id):
+    # Extracts plain text from document
     documents = extractionService.extract_text(s3_url, document_id)
+
     vectordb = Chroma(
     client=vecdb,
     collection_name=course_id,

diff --git a/backend/business/embeddingService.py b/backend/business/embeddingService.py
@@ -3,9 +3,9 @@
 from langchain_community.embeddings import SagemakerEndpointEmbeddings
 from langchain_community.embeddings.sagemaker_endpoint import EmbeddingsContentHandler
 from ..config import Config
-# profile_name, embedding_endpoint_name, region_name, environment
-# embedding model object
-# ref:
+
+# Embedding model object
+# ref: https://python.langchain.com/docs/integrations/llms/sagemaker/
 class ContentHandler(EmbeddingsContentHandler):
     content_type = "application/json"
     accepts = "application/json"
@@ -20,15 +20,14 @@ def transform_output(self, output: bytes) -> List[List[float]]:
 
 content_handler = ContentHandler()
 
-
-# Conditionally include credentials_profile_name based on profile_name
 embedding_args = {
     "endpoint_name": Config.EMBEDDING_ENDPOINT_NAME,
     "region_name": Config.REGION_NAME,
     "content_handler": content_handler,
     "endpoint_kwargs": {"CustomAttributes": "accept_eula=true"}
 }
 
+# Conditionally include credentials_profile_name based on environment
 if Config.ENVIRONMENT != 'production':
     embedding_args["credentials_profile_name"] = Config.PROFILE_NAME
 
@@ -51,7 +50,7 @@ def transform_output(self, output: bytes) -> List[List[float]]:
 query_content_handler = QueryContentHandler()
 
 
-# Conditionally include credentials_profile_name based on profile_name
+# Conditionally include credentials_profile_name based on environment
 query_embedding_args = {
     "endpoint_name": Config.EMBEDDING_ENDPOINT_NAME,
     "region_name": Config.REGION_NAME,
@@ -62,4 +61,4 @@ def transform_output(self, output: bytes) -> List[List[float]]:
 if Config.ENVIRONMENT != 'production':
     embedding_args["credentials_profile_name"] = Config.PROFILE_NAME
 
-query_embedding = SagemakerEndpointEmbeddings(**query_embedding_args)
+query_embedding = SagemakerEndpointEmbeddings(**query_embedding_args)
diff --git a/backend/business/extractionService.py b/backend/business/extractionService.py
@@ -2,7 +2,7 @@
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from ..config import Config
 
-
+# Extracts plain text from PDF stored in S3 using Textract
 def extract_text(s3_path, document_id):
     client = Config.SESSION.client("textract", region_name=Config.REGION_NAME)
     loader = AmazonTextractPDFLoader(s3_path, client=client)

diff --git a/backend/business/generationService.py b/backend/business/generationService.py
@@ -4,6 +4,7 @@
 from langchain.prompts import PromptTemplate
 from ..config import Config
 
+# ref: https://python.langchain.com/docs/integrations/llms/sagemaker/
 class ContentHandler(LLMContentHandler):
     content_type = "application/json"
     accepts = "application/json"
@@ -40,17 +41,14 @@ def transform_output(self, output: bytes) -> str:
 
 llm_open = SagemakerEndpoint(**llm_open_args)
 
-
-# llm_open = Ollama(model="mistral")
-
-template = """ <s>[INST] 
+template = """ <s>[INST]
 You are a helpful assistant that provides direct and concise answers based only on the provided information.
 Use the following information from the course information to answer the user's question. If the answer is not present in the provided information, your answer must only be 'I do not know the answer'.
-Do not refer to the fact that there are provided course documents in your answer, just directly answer the question. 
+Do not refer to the fact that there are provided course documents in your answer, just directly answer the question.
 < -- COURSE INFORMATION -- >
 {context}
 < -- END COURSE INFORMATION -- >
-< -- QUESTION -- > 
+< -- QUESTION -- >
 {question}
 < -- END QUESTION -- >
 Solution:

diff --git a/backend/business/queryService.py b/backend/business/queryService.py
@@ -16,7 +16,7 @@ def query_llm(query_data):
     user_id = query_data['user_id']
     question = question
 
-    # use chroma and sagemaker embeddings to get documents
+    # Retrieve documents using chroma and sagemaker embeddings
     vectordb = Chroma(client=vecdb, collection_name = course_id ,embedding_function=query_embedding)
     retriever = vectordb.as_retriever()
 
@@ -28,6 +28,7 @@ def query_llm(query_data):
                                   chain_type_kwargs={"prompt": prompt})
     llm_response = qa_chain(question)
 
+    # If its a new conversation, create a new conversation
     if (conversation_id is None):
         conversation = Conversation(
             name = question,
@@ -45,6 +46,8 @@ def query_llm(query_data):
     )
     db.session.add(query)
     db.session.commit()
+
+    # Get temporary url for the source documents
     s3 = Config.SESSION.client('s3')
     sources = []
     for source in llm_response["source_documents"]:
@@ -61,13 +64,15 @@ def query_llm(query_data):
 
     return response
 
+# Extracts bucket name and object name from s3 url
 def split_s3_url(source):
     parts = source.replace("s3://", "").split("/", 1)
     bucket_name = parts[0]
     object_name = parts[1]
 
     return {"bucket_name": bucket_name, "object_name": object_name}
 
+# Gets the list of questions and answers for a conversation
 def query_list(query_data):
     conversation = Conversation.query.get(query_data['conversation_id'])
     queries = sorted(conversation.queries, key=lambda query: query.date)
@@ -83,6 +88,7 @@ def query_list(query_data):
     }
     return response
 
+# Gets the list of conversations between a user and a course
 def conversation_history(query_data):
     conversations = Conversation.query.filter_by(user_id=query_data['user_id'], course_id=query_data['course_id']).order_by(Conversation.date.desc()).all()
     cId = get_course(query_data['course_id'])

diff --git a/backend/business/reportService.py b/backend/business/reportService.py
@@ -6,7 +6,7 @@
 from ..extensions import db
 from .courseService import get_course
 
-
+# Reports a conversation and assigns the report to the instructor
 def report(report_data):
     conversation = Conversation.query.get(report_data['conversation_id'])
     if not conversation:
@@ -16,6 +16,7 @@ def report(report_data):
     if not course:
         return None
 
+    # Get the instructor of the course that the conversation belongs to
     instructor = User.query.join(Course.users) \
                         .filter(Course.id == conversation.course_id) \
                         .filter(User.role == Role.Instructor) \
@@ -32,7 +33,7 @@ def report(report_data):
 
     return report.id
 
-
+# Retrieves all reports for an instructor
 def list_reports(report_data):
     reports = Report.query.filter_by(instructor_id=report_data['instructor_id']).all()
 

diff --git a/backend/business/userService.py b/backend/business/userService.py
@@ -12,6 +12,8 @@ def register(create_user_data):
 
     users = get_users()
     role = Role.Student
+
+    # Makes the first user an admin
     if len(users) == 0:
         role = Role.Admin
 
@@ -45,6 +47,8 @@ def login(login_data):
 # Changes user password
 def change_password(change_password_data):
     user = User.query.get(change_password_data['user_id'])
+
+    # Checks if the password is correct
     if bcrypt.check_password_hash(user.password, change_password_data['old_password']):
         user.password = bcrypt.generate_password_hash(change_password_data['new_password']).decode('utf-8')
         db.session.commit()

diff --git a/backend/config.py b/backend/config.py
@@ -2,9 +2,8 @@
 import os
 import json
 
-
 class Config:
-    ENVIRONMENT = os.environ.get('environment') 
+    ENVIRONMENT = os.environ.get('environment')
     if ENVIRONMENT != 'production':
         PROFILE_NAME = '' # input for local testing
         EMBEDDING_ENDPOINT_NAME = '' # input for local testing
@@ -31,5 +30,5 @@ class Config:
 
         # Set the SQLALCHEMY_DATABASE_URI
         SQLALCHEMY_DATABASE_URI = 'postgresql://' + str(DBUser) + ':' + str(DBPassword) + '@' + os.environ.get('postgres_hostname') + ':' + os.environ.get('postgres_port') + '/' + os.environ.get('postgres_database_name')
-    
+