-
Notifications
You must be signed in to change notification settings - Fork 5
/
test-query-1.py
40 lines (31 loc) · 1.34 KB
/
test-query-1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#test-query-1.py
import os
from dotenv import load_dotenv
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
load_dotenv(verbose=True)
document_dir = os.environ['DOCUMENT_DIR']
vectorstore_dir = os.environ['VECTOR_DB_DIR']
embeddings_model = os.environ['MODEL_EMBEDDINGS']
### WORKAROUND for "trust_remote_code=True is required error" in HuggingFaceEmbeddings()
from transformers import AutoModel
model = AutoModel.from_pretrained(embeddings_model, trust_remote_code=True)
print('*** Embedding and storing splitted documents into vector store')
embeddings = HuggingFaceEmbeddings(
model_name = embeddings_model,
model_kwargs = {'device':'cpu'},
encode_kwargs = {'normalize_embeddings':True}
)
vectorstore_dir = f'{vectorstore_dir}_300_0'
vectorstore = Chroma(persist_directory=vectorstore_dir, embedding_function=embeddings)
while True:
print('Question:', end='')
query = input()
query_embedding = embeddings.embed_query(query)
documents = vectorstore.similarity_search(query, k=10)
#documents = vectorstore.similarity_search_by_vector(query_embedding, k=10)
print(f'Number of documents: {len(documents)}')
for document in documents:
print(f'Document contents: {document.page_content}')
print('-'*60)
print('\n'*2)