Skip to content

Commit

Permalink
added steps to black.yml
Browse files Browse the repository at this point in the history
reformatted all files
  • Loading branch information
latekvo committed Apr 15, 2024
1 parent a6eceeb commit 588de15
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 26 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/black.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,11 @@ jobs:
uses: cytopia/docker-black@0.8
with:
path: 'core/'
- name: Python Black (main)
uses: cytopia/docker-black@0.8
with:
path: 'main.py'
- name: Python Black (crawler)
uses: cytopia/docker-black@0.8
with:
path: 'crawler.py'
4 changes: 3 additions & 1 deletion core/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
web_news_lookup,
web_docs_lookup_prompt,
web_news_lookup_prompt,
web_wiki_lookup_prompt)
web_wiki_lookup_prompt,
)
from core.tools.dbops import get_db_by_name
from core.tools.model_loader import load_model

Expand Down Expand Up @@ -56,6 +57,7 @@ def interpret_prompt_mode():
return web_wiki_lookup_prompt()
else:
return web_docs_lookup_prompt()

web_interpret_prompt_mode = interpret_prompt_mode()
# NOTE: a detour has been performed here, more details:
# web_chain_function will soon become just a tool playing a part of a larger mechanism.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

@dataclass
class EmbedderConfiguration:
supplier: Literal['ollama', 'hugging_face']
supplier: Literal["ollama", "hugging_face"]
model_name: str
model_token_limit: int
article_limit: int
Expand Down
2 changes: 1 addition & 1 deletion core/models/configuration_objects/llm_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

@dataclass
class LlmConfiguration:
supplier: Literal['ollama', 'hugging_face']
supplier: Literal["ollama", "hugging_face"]
model_name: str
model_token_limit: int
model_file: Optional[str] = None
8 changes: 4 additions & 4 deletions core/models/configurations.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
from core.models.configuration_objects.llm_configuration import LlmConfiguration
from core.models.configuration_objects.embedder_configuration import EmbedderConfiguration
from core.models.configuration_objects.embedder_configuration import (
EmbedderConfiguration,
)
from terminal_gui import USE_HUGGING_FACE

llm_ollama_heavy = LlmConfiguration(
supplier="ollama",
model_name="zephyr:7b-beta-q5_K_M",
model_token_limit=4096,
model_file=""
model_file="",
)

embedder_ollama_heavy = EmbedderConfiguration(
supplier="ollama",
model_name="nomic-embed-text",
model_token_limit=4096,

# chunk spliter options
article_limit=10,
buffer_stops=["\n\n\n", "\n\n", "\n", ". ", ", ", " ", ""],
Expand All @@ -32,7 +33,6 @@
model_name="nomic-embed-text-v1.5.Q6_K.gguf",
model_file="nomic-ai/nomic-embed-text-v1.5-GGUF",
model_token_limit=4096,

# chunk spliter options
article_limit=10,
buffer_stops=["\n\n\n", "\n\n", "\n", ". ", ", ", " ", ""],
Expand Down
36 changes: 23 additions & 13 deletions core/tools/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ def docs_to_context(docs_and_scores: List[Document], token_limit: int) -> str:
if document_index >= len(docs_and_scores):
break

print(f"{Fore.CYAN}Used {document_index + 1} snippets with a total of {token_count} tokens as context.{Fore.RESET}")
print(
f"{Fore.CYAN}Used {document_index + 1} snippets with a total of {token_count} tokens as context.{Fore.RESET}"
)
print(f"{Fore.CYAN}Context itself: {Fore.RESET}", context_text)
return context_text

Expand All @@ -49,11 +51,12 @@ def query_for_urls(query: WebQuery, url_amount=embed_config.article_limit) -> Li

url_list = search(
query=query.web_query,
stop= url_amount,
lang='en',
safe='off',
stop=url_amount,
lang="en",
safe="off",
tbs=query.web_tbs,
extra_params=query.web_extra_params)
extra_params=query.web_extra_params,
)
print(f"{Fore.CYAN}Web search completed.{Fore.RESET}")
return url_list

Expand All @@ -79,7 +82,8 @@ def populate_db_with_google_search(database: FAISS, query: WebQuery):
chunk_size=query.db_chunk_size,
chunk_overlap=embed_config.chunk_overlap,
keep_separator=False,
strip_whitespace=True)
strip_whitespace=True,
)

chunks = text_splitter.split_documents(document)

Expand All @@ -88,29 +92,35 @@ def populate_db_with_google_search(database: FAISS, query: WebQuery):
chunks.remove(chunk)
continue

chunk.page_content = remove(chunk.page_content, ['\n', '`'])
chunk.page_content = (query.db_embedding_prefix +
chunk.page_content +
query.db_embedding_postfix)
chunk.page_content = remove(chunk.page_content, ["\n", "`"])
chunk.page_content = (
query.db_embedding_prefix
+ chunk.page_content
+ query.db_embedding_postfix
)

if len(chunks) != 0:
database.add_documents(documents=chunks, embeddings=embeddings)

db_name = embedding_model_safe_name + query.db_save_file_extension
database.save_local(folder_path='store/vector', index_name=db_name)
database.save_local(folder_path="store/vector", index_name=db_name)

print(f"{Fore.CYAN}Document vectorization completed.{Fore.RESET}")


def web_query_google_lookup(query: WebQuery, token_limit: int = embed_config.model_token_limit):
def web_query_google_lookup(
query: WebQuery, token_limit: int = embed_config.model_token_limit
):
db_name = embedding_model_safe_name + query.db_save_file_extension
db = get_db_by_name(db_name, embeddings)

populate_db_with_google_search(db, query)

# return the document with the highest prompt similarity score (for now only browsing the first search result)
embedding_vector = embeddings.embed_query(query.db_embed_query)
docs_and_scores = db.similarity_search_by_vector(embedding_vector, k=round(token_limit / 64))
docs_and_scores = db.similarity_search_by_vector(
embedding_vector, k=round(token_limit / 64)
)

print(f"{Fore.CYAN}Database search completed.{Fore.RESET}")

Expand Down
12 changes: 6 additions & 6 deletions terminal_gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,15 @@ def get_input():

parser = argparse.ArgumentParser()
parser.add_argument(
'-H',
'--use-hugging-face',
dest='use_hugging_face',
"-H",
"--use-hugging-face",
dest="use_hugging_face",
action="store_true",
help='Use Hugging Face as the model provider'
help="Use Hugging Face as the model provider",
)
USE_HUGGING_FACE = parser.parse_args().use_hugging_face

'''
"""
parser.add_argument(
'-O',
'--use-ollama',
Expand All @@ -126,4 +126,4 @@ def get_input():
help='Use Ollama as the model provider'
)
USE_OLLAMA = parser.parse_args().use_ollama
'''
"""

0 comments on commit 588de15

Please sign in to comment.