Skip to content
This repository has been archived by the owner on Jul 12, 2024. It is now read-only.

Commit

Permalink
Merge pull request #4 from loyal812/feat/chatting-class
Browse files Browse the repository at this point in the history
feat: chatting class
  • Loading branch information
loyal812 committed Mar 26, 2024
2 parents c6ecb8f + 1390187 commit 4086c45
Show file tree
Hide file tree
Showing 22 changed files with 125 additions and 126 deletions.
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,6 @@ llama_index==0.9.15
pypdf
spacy
ragas
datasets
datasets
docx2txt
cryptography
66 changes: 66 additions & 0 deletions src/chatting/ChattingClass.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import openai
import os
import argparse
from dotenv import load_dotenv
from llama_index import ServiceContext, SimpleDirectoryReader, VectorStoreIndex
from llama_index.llms import OpenAI

class ChattingClass:
def __init__(self, model_id, data_path, api_key="", temperature=0.3):
self.model_id = model_id
self.data_path = data_path
self.temperature = temperature
self.set_api_key(api_key)
self.set_document(data_path)

def set_api_key(self, api_key):
if api_key:
self.api_key = api_key
else:
load_dotenv()
self.api_key = os.getenv("OPENAI_API_KEY")

if self.api_key is not None:
os.environ["OPENAI_API_KEY"] = self.api_key
openai.api_key = self.api_key
return True
else:
# Handle the absence of the environment variable
# You might want to log an error, raise an exception, or provide a default value
# For example, setting a default value
os.environ["OPENAI_API_KEY"] = "your_default_api_key"
openai.api_key = "openai_api_key"
return False

def set_document(self, data_path):
self.documents = SimpleDirectoryReader(
data_path
).load_data()

def ask_question(self, question):
ft_context = ServiceContext.from_defaults(
llm=OpenAI(model=self.model_id, temperature=self.temperature),
context_window=2048
)

index = VectorStoreIndex.from_documents(self.documents, service_context=ft_context)
query_engine = index.as_query_engine(service_context=ft_context)

response = query_engine.query(question)
return response

# Using argparse to get the question input from the user
parser = argparse.ArgumentParser(description='Chatting Engine')
# parser.add_argument('api_key', type=str, help='Openai API Key')
parser.add_argument('data_path', type=str, help='Data Path')
parser.add_argument('model_id', type=str, help='model')
parser.add_argument('question', type=str, help='question')
# parser.add_argument('temperature', type=str, help='temperature')
args = parser.parse_args()

# model_id = 'ft:gpt-3.5-turbo-0613:personal::8XaasBXv'
# data_path = "./src/test/regression/regression_test003"
chatbot = ChattingClass(model_id=args.model_id, data_path=args.data_path)

response = chatbot.ask_question(args.question)
print(response)
18 changes: 11 additions & 7 deletions src/finetune/FineTuningClass.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(self, data_path, api_key='', model='gpt-3.5-turbo', temperature=0.3
self.retry_delay = 60
self.set_api_key(api_key)
self.set_document(data_path)
self.generate_subfolder(data_path)

def set_api_key(self, api_key):
if api_key:
Expand All @@ -52,6 +53,10 @@ def set_document(self, data_path):
data_path
).load_data()

def generate_subfolder(self, data_path):
subfolder_name = "generated_data"
subfolder_path = os.path.join(data_path, subfolder_name)
os.makedirs(subfolder_path, exist_ok=True)

def train_generation(self):
for attempt in range(1, self.max_retries + 1):
Expand Down Expand Up @@ -84,8 +89,8 @@ def generate_and_save_questions(documents, output_file):
for question in questions:
f.write(question + "\n")

generate_and_save_questions(self.documents[:half_point], f'{self.data_path}/train_questions.txt')
generate_and_save_questions(self.documents[half_point:], f'{self.data_path}/eval_questions.txt')
generate_and_save_questions(self.documents[:half_point], f'{self.data_path}/generated_data/train_questions.txt')
generate_and_save_questions(self.documents[half_point:], f'{self.data_path}/generated_data/eval_questions.txt')

break
except Exception as e:
Expand Down Expand Up @@ -140,7 +145,7 @@ def jsonl_generation(self):
)

questions = []
with open(f'{self.data_path}/train_questions.txt', "r") as f:
with open(f'{self.data_path}/generated_data/train_questions.txt', "r") as f:
for line in f:
questions.append(line.strip())

Expand All @@ -155,12 +160,11 @@ def jsonl_generation(self):
# Handle the exception here, you might want to log the error or take appropriate action
print(f"An error occurred: {e}")
finally:
if 'finetuning_handler' in locals() and 'data_path' in locals():
finetuning_handler.save_finetuning_events(f'{self.data_path}/finetuning_events.jsonl')
finetuning_handler.save_finetuning_events(f'{self.data_path}/generated_data/finetuning_events.jsonl')


def finetune(self):
file_upload = openai.files.create(file=open(f'{self.data_path}/finetuning_events.jsonl', "rb"), purpose="fine-tune")
file_upload = openai.files.create(file=open(f'{self.data_path}/generated_data/finetuning_events.jsonl', "rb"), purpose="fine-tune")
print("Uploaded file id", file_upload.id)

while True:
Expand All @@ -182,7 +186,7 @@ def finetune(self):
print("Fine-tuned model info", job_handle)
print("Model id", job_handle.fine_tuned_model)

with open(f'{self.data_path}/model.txt', "w") as f:
with open(f'{self.data_path}/generated_data/model.txt', "w") as f:
f.write(job_handle.fine_tuned_model + "\n")
break
time.sleep(3)
Expand Down
4 changes: 2 additions & 2 deletions src/finetune/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
os.environ["OPENAI_API_KEY"] = "your_default_api_key"
openai.api_key = "openai_api_key"

data_path = "./src/test/regression/regression_test004"
data_path = "./src/test/regression/regression_test003"

file_upload = openai.files.create(file=open(f'{data_path}/finetuning_events.jsonl', "rb"), purpose="fine-tune")
file_upload = openai.files.create(file=open(f'{data_path}/generated_data/finetuning_events.jsonl', "rb"), purpose="fine-tune")
print("Uploaded file id", file_upload.id)

while True:
Expand Down
4 changes: 2 additions & 2 deletions src/finetune/initital_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@
# For example, setting a default value
os.environ["OPENAI_API_KEY"] = "your_default_api_key"

data_path = "./src/test/regression/regression_test004"
data_path = "./src/test/regression/regression_test003"

documents = SimpleDirectoryReader(
data_path
).load_data()

questions = []
with open(f'{data_path}/eval_questions.txt', "r") as f:
with open(f'{data_path}/generated_data/eval_questions.txt', "r") as f:
for line in f:
questions.append(line.strip())

Expand Down
7 changes: 3 additions & 4 deletions src/finetune/jsonl_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# For example, setting a default value
os.environ["OPENAI_API_KEY"] = "your_default_api_key"

data_path = "./src/test/regression/regression_test004"
data_path = "./src/test/regression/regression_test003"

documents = SimpleDirectoryReader(
data_path
Expand All @@ -33,7 +33,7 @@
)

questions = []
with open(f'{data_path}/train_questions.txt', "r") as f:
with open(f'{data_path}/generated_data/train_questions.txt', "r") as f:
for line in f:
questions.append(line.strip())

Expand All @@ -50,5 +50,4 @@
# Handle the exception here, you might want to log the error or take appropriate action
print(f"An error occurred: {e}")
finally:
if 'finetuning_handler' in locals() and 'data_path' in locals():
finetuning_handler.save_finetuning_events(f'{data_path}/finetuning_events.jsonl')
finetuning_handler.save_finetuning_events(f'{data_path}/generated_data/finetuning_events.jsonl')
2 changes: 1 addition & 1 deletion src/finetune/run-model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
os.environ["OPENAI_API_KEY"] = "your_default_api_key"
openai.api_key = "openai_api_key"

data_path = "./src/test/regression/regression_test004"
data_path = "./src/test/regression/regression_test003"

model_id = "ft:gpt-3.5-turbo-0613:personal::8XCvxg1X"

Expand Down
6 changes: 3 additions & 3 deletions src/finetune/train_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
else:
os.environ["OPENAI_API_KEY"] = "your_default_api_key"

data_path = "./src/test/regression/regression_test004"
data_path = "./src/test/regression/regression_test003"

max_retries = 5
retry_delay = 60 # in seconds
Expand Down Expand Up @@ -54,8 +54,8 @@ def generate_and_save_questions(documents, output_file):
for question in questions:
f.write(question + "\n")

generate_and_save_questions(documents[:half_point], f'{data_path}/train_questions.txt')
generate_and_save_questions(documents[half_point:], f'{data_path}/eval_questions.txt')
generate_and_save_questions(documents[:half_point], f'{data_path}/generated_data/train_questions.txt')
generate_and_save_questions(documents[half_point:], f'{data_path}/generated_data/eval_questions.txt')

break
except Exception as e:
Expand Down
Binary file not shown.
Binary file not shown.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
What is the correlation between the distance in the latent space and the MPJPE in the AI Golf swing analysis tool?
What is the title of the paper mentioned in the context?
What is the title of the paper mentioned in the context?
Question: What is the title of the paper referenced in citation [28]?
Question: Who received the B.S. degree from the School of Computer Science and Engineering, Korea University of Technology and Education, Cheonan, South Korea, in 2017?
Question: What method was used to remove the background pixels and isolate the human body in the video dataset?
What evaluation metric was used to assess the alignment precision of the network in the golf swing analysis tool?
What is the main challenge addressed in the proposed golf swing analysis tool using neural networks?
What recent advancements in technology have allowed for the development of systems to help users understand the movements of professionals in sports?
What are the main contributions of the study on the golf swing analysis tool for self-training?
What is one of the main contributions of the proposed golf swing analysis method discussed in the study?
Loading

0 comments on commit 4086c45

Please sign in to comment.