-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathapp.py
109 lines (83 loc) · 3.08 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import chainlit as cl
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.chroma import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.document_loaders import UnstructuredHTMLLoader, PyPDFLoader, CSVLoader
import os
welcome_message = """### Welcome to Chat with 👋
You can upload a file or paste a url to chat with.
I support many types of files and urls.
"""
supported_file_types = [
"text/plain", # Text
"text/html", # HTML
"application/pdf", # PDF
"text/csv", # CSV
]
embeddings = OpenAIEmbeddings()
memory = ConversationBufferMemory(
memory_key="chat_history", return_messages=True
)
llm = ChatOpenAI(temperature=0, model="gpt-4")
condense_question_llm = ChatOpenAI(temperature=0, model='gpt-3.5-turbo')
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000, chunk_overlap=200)
def create_vectorstore(file):
with open(file.name, 'wb') as _file:
_file.write(file.content)
file_path = os.path.abspath(file.name)
if file.type == "text/plain":
text = file.content.decode("utf-8")
texts = text_splitter.split_text(text)
vectorstore = Chroma.from_texts(texts, embeddings)
return vectorstore
if file.type == "text/html":
loader = UnstructuredHTMLLoader(file_path)
if file.type == "application/pdf":
loader = PyPDFLoader(file_path)
if file.type == "text/csv":
loader = CSVLoader(file_path)
documents = loader.load()
documents = text_splitter.split_documents(documents)
vectorstore = Chroma.from_documents(documents, embeddings)
if os.path.exists(file_path):
os.remove(file_path)
return vectorstore
@cl.langchain_factory
def main():
cl.Message(content=welcome_message).send()
cleaned_resp = None
valid_responses = ["file", "url"]
while cleaned_resp not in valid_responses:
resp = cl.AskUserMessage(
"What would you like to chat with, a File or a URL?"
).send()
if 'content' in resp:
cleaned_resp = resp['content'].strip().lower()
if cleaned_resp == "url":
cl.Message(
content="URLs are not supported yet. Please upload a file."
).send()
cleaned_resp = "file"
if cleaned_resp == "file":
file = None
while file is None:
file = cl.AskFileMessage(
content='Upload your file:', accept=supported_file_types, timeout=180
).send()
vectorstore = create_vectorstore(file)
chain = ConversationalRetrievalChain.from_llm(
llm,
vectorstore.as_retriever(),
condense_question_llm=condense_question_llm,
memory=memory,
)
cl.Message(
content=f"`{file.name}` uploaded! How can I help you?"
).send()
return chain
# if cleaned_resp == "url":
# pass