Skip to content

Commit

Permalink
Merge pull request #22 from kamyabnazari/kn-vector-db-swtich-and-fixes
Browse files Browse the repository at this point in the history
Changing to qdrant db
  • Loading branch information
kamyabnazari authored Jun 27, 2023
2 parents 9cfd739 + f1e6817 commit 8c7c5ab
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 25 deletions.
23 changes: 10 additions & 13 deletions backend/chat_bot_function.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,33 @@
import os
from langchain.vectorstores.chroma import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.schema import HumanMessage, AIMessage
from langchain.vectorstores import Qdrant
from qdrant_client import QdrantClient

from dotenv import load_dotenv

load_dotenv()

def make_chain(documentId:str):
model = ChatOpenAI(
model_name="gpt-3.5-turbo",
temperature="0",
# verbose=True
)
embedding = OpenAIEmbeddings()

vector_store = Chroma(
collection_name=documentId,
embedding_function=embedding,
persist_directory= os.getenv('DB_PERSIST_DIRECTORY'),
)

embeddings = OpenAIEmbeddings()

client = QdrantClient(os.getenv('PUBLIC_QDRANT_URL'))
qdrant = Qdrant(client, documentId, embeddings)

return ConversationalRetrievalChain.from_llm(
model,
retriever=vector_store.as_retriever(),
retriever=qdrant.as_retriever(),
return_source_documents=True,
# verbose=True,
)


def chat_bot_funtion(question: str, chat_history, documentId: str):
load_dotenv()
if not chat_history:
chat_history=[]

Expand Down
21 changes: 11 additions & 10 deletions backend/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@
import PyPDF3
import re
import os
import sys
from typing import Callable, List, Tuple, Dict

from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Qdrant

from qdrant_client import QdrantClient

from dotenv import load_dotenv

load_dotenv

def extract_metadata_from_pdf(file_path: str) -> dict:
with open(file_path, "rb") as pdf_file:
Expand Down Expand Up @@ -108,9 +110,8 @@ def text_to_docs(text: List[str], metadata: Dict[str, str]) -> List[Document]:

return doc_chunks


def create_embeddings_from_pdf_file(file_path: str, documentId: str):

# Step 1: Parse PDF
raw_pages, metadata = parse_pdf(file_path)

Expand All @@ -128,12 +129,12 @@ def create_embeddings_from_pdf_file(file_path: str, documentId: str):

# Step 3 + 4: Generate embeddings and store them in DB
embeddings = OpenAIEmbeddings()
vector_store = Chroma.from_documents(

client = QdrantClient(os.getenv('PUBLIC_QDRANT_URL'))
qdrant = Qdrant(client, documentId, embeddings)

qdrant.from_documents(
document_chunks,
embeddings,
collection_name=documentId,
persist_directory=os.getenv('DB_PERSIST_DIRECTORY'),
)

# Save DB locally
vector_store.persist()
)
2 changes: 1 addition & 1 deletion backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from fastapi.middleware.cors import CORSMiddleware

# Importing langchain
from langchain import LLMChain, PromptTemplate
from langchain import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema import HumanMessage, AIMessage
Expand Down
1 change: 0 additions & 1 deletion backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ pocketbase
PyPDF3
pdfkit
pytest
chromadb
pdfplumber
qdrant-client
retry
Expand Down
1 change: 1 addition & 0 deletions frontend/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Create a `.env` file in the root of the frontend directory with the following va
```
PUBLIC_POCKETBASE_URL=http://localhost:8090
PUBLIC_BACKEND_URL=http://localhost:5003
PUBLIC_QDRANT_URL=http://localhost:6333
PUBLIC_SECURE=true
PUBLIC_HTTPONLY=false
PUBLIC_SAMESITE=None
Expand Down
7 changes: 7 additions & 0 deletions frontend/src/lib/components/FileTable.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import type { Record } from 'pocketbase';
import { onMount } from 'svelte';
import { getDocumentURL } from '$lib/utils';
import { env } from '$env/dynamic/public';
import axios from 'axios';
onMount(async () => {
await fetchDocuments();
Expand All @@ -28,6 +30,11 @@
async function deleteDocument(documentID: string) {
try {
await pb.collection('documents').delete(documentID);
await axios({
url: `${env.PUBLIC_QDRANT_URL}/collections/${documentID}`,
method: 'delete',
headers: { 'Content-Type': 'application/json' }
});
documentList = documentList.filter((document) => document.id !== documentID);
} catch (error) {
console.error('Fetch error:', error);
Expand Down

0 comments on commit 8c7c5ab

Please sign in to comment.