added steps to black.yml

reformatted all files
latekvo · Apr 15, 2024 · 588de15 · 588de15
1 parent a6eceeb
commit 588de15
Show file tree

Hide file tree

Showing 7 changed files with 46 additions and 26 deletions.
diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
@@ -12,3 +12,11 @@ jobs:
         uses: cytopia/docker-black@0.8
         with:
           path: 'core/'
+      - name: Python Black (main)
+        uses: cytopia/docker-black@0.8
+        with:
+          path: 'main.py'
+      - name: Python Black (crawler)
+        uses: cytopia/docker-black@0.8
+        with:
+          path: 'crawler.py'
diff --git a/core/lookup.py b/core/lookup.py
@@ -13,7 +13,8 @@
     web_news_lookup,
     web_docs_lookup_prompt,
     web_news_lookup_prompt,
-    web_wiki_lookup_prompt)
+    web_wiki_lookup_prompt,
+)
 from core.tools.dbops import get_db_by_name
 from core.tools.model_loader import load_model
 
@@ -56,6 +57,7 @@ def interpret_prompt_mode():
             return web_wiki_lookup_prompt()
         else:
             return web_docs_lookup_prompt()
+
     web_interpret_prompt_mode = interpret_prompt_mode()
     # NOTE: a detour has been performed here, more details:
     #       web_chain_function will soon become just a tool playing a part of a larger mechanism.

diff --git a/core/models/configuration_objects/embedder_configuration.py b/core/models/configuration_objects/embedder_configuration.py
@@ -4,7 +4,7 @@
 
 @dataclass
 class EmbedderConfiguration:
-    supplier: Literal['ollama', 'hugging_face']
+    supplier: Literal["ollama", "hugging_face"]
     model_name: str
     model_token_limit: int
     article_limit: int

diff --git a/core/models/configuration_objects/llm_configuration.py b/core/models/configuration_objects/llm_configuration.py
@@ -4,7 +4,7 @@
 
 @dataclass
 class LlmConfiguration:
-    supplier: Literal['ollama', 'hugging_face']
+    supplier: Literal["ollama", "hugging_face"]
     model_name: str
     model_token_limit: int
     model_file: Optional[str] = None
diff --git a/core/models/configurations.py b/core/models/configurations.py
@@ -1,19 +1,20 @@
 from core.models.configuration_objects.llm_configuration import LlmConfiguration
-from core.models.configuration_objects.embedder_configuration import EmbedderConfiguration
+from core.models.configuration_objects.embedder_configuration import (
+    EmbedderConfiguration,
+)
 from terminal_gui import USE_HUGGING_FACE
 
 llm_ollama_heavy = LlmConfiguration(
     supplier="ollama",
     model_name="zephyr:7b-beta-q5_K_M",
     model_token_limit=4096,
-    model_file=""
+    model_file="",
 )
 
 embedder_ollama_heavy = EmbedderConfiguration(
     supplier="ollama",
     model_name="nomic-embed-text",
     model_token_limit=4096,
-
     # chunk spliter options
     article_limit=10,
     buffer_stops=["\n\n\n", "\n\n", "\n", ". ", ", ", " ", ""],
@@ -32,7 +33,6 @@
     model_name="nomic-embed-text-v1.5.Q6_K.gguf",
     model_file="nomic-ai/nomic-embed-text-v1.5-GGUF",
     model_token_limit=4096,
-
     # chunk spliter options
     article_limit=10,
     buffer_stops=["\n\n\n", "\n\n", "\n", ". ", ", ", " ", ""],

diff --git a/core/tools/scraper.py b/core/tools/scraper.py
@@ -35,7 +35,9 @@ def docs_to_context(docs_and_scores: List[Document], token_limit: int) -> str:
         if document_index >= len(docs_and_scores):
             break
 
-    print(f"{Fore.CYAN}Used {document_index + 1} snippets with a total of {token_count} tokens as context.{Fore.RESET}")
+    print(
+        f"{Fore.CYAN}Used {document_index + 1} snippets with a total of {token_count} tokens as context.{Fore.RESET}"
+    )
     print(f"{Fore.CYAN}Context itself: {Fore.RESET}", context_text)
     return context_text
 
@@ -49,11 +51,12 @@ def query_for_urls(query: WebQuery, url_amount=embed_config.article_limit) -> Li
 
     url_list = search(
         query=query.web_query,
-        stop= url_amount,
-        lang='en',
-        safe='off',
+        stop=url_amount,
+        lang="en",
+        safe="off",
         tbs=query.web_tbs,
-        extra_params=query.web_extra_params)
+        extra_params=query.web_extra_params,
+    )
     print(f"{Fore.CYAN}Web search completed.{Fore.RESET}")
     return url_list
 
@@ -79,7 +82,8 @@ def populate_db_with_google_search(database: FAISS, query: WebQuery):
             chunk_size=query.db_chunk_size,
             chunk_overlap=embed_config.chunk_overlap,
             keep_separator=False,
-            strip_whitespace=True)
+            strip_whitespace=True,
+        )
 
         chunks = text_splitter.split_documents(document)
 
@@ -88,29 +92,35 @@ def populate_db_with_google_search(database: FAISS, query: WebQuery):
                 chunks.remove(chunk)
                 continue
 
-            chunk.page_content = remove(chunk.page_content, ['\n', '`'])
-            chunk.page_content = (query.db_embedding_prefix +
-                                  chunk.page_content +
-                                  query.db_embedding_postfix)
+            chunk.page_content = remove(chunk.page_content, ["\n", "`"])
+            chunk.page_content = (
+                query.db_embedding_prefix
+                + chunk.page_content
+                + query.db_embedding_postfix
+            )
 
         if len(chunks) != 0:
             database.add_documents(documents=chunks, embeddings=embeddings)
 
     db_name = embedding_model_safe_name + query.db_save_file_extension
-    database.save_local(folder_path='store/vector', index_name=db_name)
+    database.save_local(folder_path="store/vector", index_name=db_name)
 
     print(f"{Fore.CYAN}Document vectorization completed.{Fore.RESET}")
 
 
-def web_query_google_lookup(query: WebQuery, token_limit: int = embed_config.model_token_limit):
+def web_query_google_lookup(
+    query: WebQuery, token_limit: int = embed_config.model_token_limit
+):
     db_name = embedding_model_safe_name + query.db_save_file_extension
     db = get_db_by_name(db_name, embeddings)
 
     populate_db_with_google_search(db, query)
 
     # return the document with the highest prompt similarity score (for now only browsing the first search result)
     embedding_vector = embeddings.embed_query(query.db_embed_query)
-    docs_and_scores = db.similarity_search_by_vector(embedding_vector, k=round(token_limit / 64))
+    docs_and_scores = db.similarity_search_by_vector(
+        embedding_vector, k=round(token_limit / 64)
+    )
 
     print(f"{Fore.CYAN}Database search completed.{Fore.RESET}")
 

diff --git a/terminal_gui.py b/terminal_gui.py
@@ -109,15 +109,15 @@ def get_input():
 
 parser = argparse.ArgumentParser()
 parser.add_argument(
-    '-H',
-    '--use-hugging-face',
-    dest='use_hugging_face',
+    "-H",
+    "--use-hugging-face",
+    dest="use_hugging_face",
     action="store_true",
-    help='Use Hugging Face as the model provider'
+    help="Use Hugging Face as the model provider",
 )
 USE_HUGGING_FACE = parser.parse_args().use_hugging_face
 
-'''
+"""
 parser.add_argument(
     '-O',
     '--use-ollama',
@@ -126,4 +126,4 @@ def get_input():
     help='Use Ollama as the model provider'
 )
 USE_OLLAMA = parser.parse_args().use_ollama
-'''
+"""