Merge pull request #15 from SasCezar/dev

Fix and detached the dependency on the ui as git module
SasCezar · Oct 3, 2024 · 9c8b538 · 9c8b538
2 parents 8a1c4ab + 1014403
commit 9c8b538
Show file tree

Hide file tree

Showing 16 changed files with 48 additions and 48 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "autofl-ui"]
-	path = autofl-ui
-	url = git@github.com:SasCezar/autofl-ui.git

diff --git a/autofl-ui b/autofl-ui
diff --git a/config/main.yaml b/config/main.yaml
@@ -3,7 +3,7 @@ defaults:
   - _self_
   - local: docker
   - taxonomy: gitranking
-  - annotator: simple
+  - annotator: default
   - version_strategy: latest
   - dataloader: postgres
   - writer: postgres

diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -1,4 +1,3 @@
-version: "3.8"
 services:
   api:
     build:
@@ -39,16 +38,10 @@ services:
       - "8080:8080"
 
   ui:
-    build:
-      context: autofl-ui/
-      dockerfile: ./docker/Dockerfile
+    image: cezarsas/autofl-ui:latest
     ports:
       - "8501:8501"
     stdin_open: true # docker run -i
     tty: true        # docker run -t
-    volumes:
-      - type: bind
-        source: ./autofl-ui/
-        target: /autofl-ui
     links:
       - 'api:auto-fl'
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -1,13 +1,25 @@
-FROM python:3.10 as python-base
-RUN mkdir autofl
-WORKDIR  /autofl
-COPY pyproject.toml /autofl
-RUN pip3 install poetry
-RUN poetry config virtualenvs.create false
-RUN poetry install -n --no-ansi  --without dev
+# Start with the official Python 3.10 image as base
+FROM python:3.10 AS python-base
+
+# Set working directory and copy pyproject.toml
+WORKDIR /autofl
+COPY pyproject.toml .
+
+# Install poetry in a single layer and configure it
+RUN pip install --no-cache-dir poetry \
+    && poetry config virtualenvs.create false \
+    && poetry install --no-root --no-ansi --without dev
+
+# Copy the remaining application code
 COPY . .
+
+# Set environment variables
 ENV PYTHONPATH=/autofl/src
+ENV HYDRA_FULL_ERROR=1
 
-HEALTHCHECK CMD curl --fail http://localhost:8000
+# Define a health check command
+HEALTHCHECK --interval=30s --timeout=10s --retries=3 \
+    CMD curl --fail http://localhost:8000 || exit 1
 
-CMD ["gunicorn", "-w", "4", "-k", "uvicorn.workers.UvicornWorker", "api.main:app", "--bind", "0.0.0.0:8000",  "--timeout", "100000"]
+# Set the default command to start the app
+CMD ["gunicorn", "-w", "4", "-k", "uvicorn.workers.UvicornWorker", "api.main:app", "--bind", "0.0.0.0:8000", "--timeout", "100000"]
diff --git a/mkdocs_requirements.txt b/mkdocs_requirements.txt
@@ -1,7 +1,8 @@
-mkdocs==1.5.3
-mkdocstrings==0.24.0
+mkdocs==1.6.1
+mkdocstrings==0.26.1
 mkdocs-gen-files==0.5.0
-mkdocstrings-python==1.7.5
+mkdocstrings-python==1.11.1
 mkdocs-literate-nav==0.6.1
-mkdocs-section-index==0.3.8
-mkdocs-material==9.4.14
+mkdocs-section-index==0.3.9
+mkdocs-material==9.5.39
+griffe==1.3.2
diff --git a/pyproject.toml b/pyproject.toml
@@ -25,7 +25,7 @@ more-itertools = "^10.2.0"
 tree-sitter = "^0.20.4"
 sqlalchemy = "^2.0.25"
 psycopg = {extras = ["binary"], version = "^3.1.17"}
-gensim = "^4.3.2"
+gensim = "^4.3.3"
 fasttext-wheel = "^0.9.2"
 transformers = "^4.37.1"
 sentence-transformers = "^2.2.2"

diff --git a/src/annotation/similarity.py b/src/annotation/similarity.py
@@ -4,8 +4,8 @@
 from loguru import logger
 from sklearn.metrics.pairwise import cosine_similarity
 
-from embedding.embedding import AbstractEmbeddingModel
-from entity.taxonomy import TaxonomyBase
+from embedding import AbstractEmbeddingModel
+from entity import TaxonomyBase
 from annotation import LFBase
 
 

diff --git a/src/embedding/__init__.py b/src/embedding/__init__.py
@@ -0,0 +1 @@
+from .abstract import AbstractEmbeddingModel
diff --git a/src/embedding/embedding.py → src/embedding/abstract.py b/src/embedding/embedding.py → src/embedding/abstract.py
diff --git a/src/embedding/ft.py b/src/embedding/ft.py
@@ -1,7 +1,7 @@
 import fasttext as ft
-import numpy
+import numpy as np
 
-from embedding.embedding import AbstractEmbeddingModel
+from embedding.abstract import AbstractEmbeddingModel
 
 
 class FastTextEmbedding(AbstractEmbeddingModel):
@@ -14,7 +14,7 @@ def __init__(self, path: str, model: str = 'fastText', split_camel: bool = False
         self._name = f'{model}'
         self.model = ft.load_model(path)
 
-    def get_embedding(self, text: str) -> numpy.ndarray:
+    def get_embedding(self, text: str) -> np.ndarray:
         """
         Returns the embedding of the text.
         :param text:

diff --git a/src/embedding/gensim_w2v.py b/src/embedding/gensim_w2v.py
@@ -1,9 +1,7 @@
-import re
-
-import numpy
 import numpy as np
 from gensim.models import KeyedVectors
-from embedding.embedding import AbstractEmbeddingModel
+
+from embedding import AbstractEmbeddingModel
 
 
 class W2VEmbedding(AbstractEmbeddingModel):
@@ -16,7 +14,7 @@ def __init__(self, path: str, model: str = 'W2V-Unk', split_camel: bool = False)
         self._name = f'{model}'
         self.model = KeyedVectors.load_word2vec_format(path, binary=True)
 
-    def get_embedding(self, text: str) -> numpy.ndarray:
+    def get_embedding(self, text: str) -> np.ndarray:
         """
         Returns the embedding of the text.
         :param text:
@@ -30,4 +28,4 @@ def get_embedding(self, text: str) -> numpy.ndarray:
                 embeddings.append(self.model[word])
             else:
                 embeddings.append(np.zeros(self.model.vector_size))
-        return numpy.mean(embeddings, axis=0)
+        return np.mean(embeddings, axis=0)
diff --git a/src/embedding/huggingface.py b/src/embedding/huggingface.py
@@ -1,7 +1,7 @@
-import numpy
+import numpy as np
 import torch
 
-from embedding.embedding import AbstractEmbeddingModel
+from embedding.abstract import AbstractEmbeddingModel
 from sentence_transformers import SentenceTransformer
 from transformers import BertModel, BertTokenizer
 
@@ -18,7 +18,7 @@ def __init__(self, name, model, split_camel: bool = False):
         self.model = BertModel.from_pretrained(model)
         self.tokenizer = BertTokenizer.from_pretrained(model, do_lower_case=do_lower_case)
 
-    def get_embedding(self, text: str) -> numpy.ndarray:
+    def get_embedding(self, text: str) -> np.ndarray:
         """
         Returns the embedding of the text.
         :param text:
@@ -39,7 +39,7 @@ def __init__(self, name, model, device='cpu', split_camel: bool = False):
         self.model = SentenceTransformer(model, device=device)
         self.model.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
 
-    def get_embedding(self, text: str) -> numpy.ndarray:
+    def get_embedding(self, text: str) -> np.ndarray:
         """
         Returns the embedding of the text.
         :param text:

diff --git a/src/embedding/spacy_bert.py b/src/embedding/spacy_bert.py
@@ -1,6 +1,6 @@
-import numpy
+import numpy as np
 
-from embedding.embedding import AbstractEmbeddingModel
+from embedding.abstract import AbstractEmbeddingModel
 
 
 class BERTEmbedding(AbstractEmbeddingModel):
@@ -13,7 +13,7 @@ def __init__(self, model, split_camel: bool = False):
         self._name = f'{model}'
         self.model = spacy.load(model, disable=["ner", "textcat", "parser"])
 
-    def get_embedding(self, text: str) -> numpy.ndarray:
+    def get_embedding(self, text: str) -> np.ndarray:
         """
         Returns the embedding of the text.
         :param text:

diff --git a/src/entity/__init__.py b/src/entity/__init__.py
@@ -0,0 +1 @@
+from .taxonomy import TaxonomyBase
diff --git a/src/pipeline/pipeline.py b/src/pipeline/pipeline.py
@@ -1,5 +1,4 @@
 from abc import ABC
-from pathlib import Path
 from typing import List, Tuple
 
 from loguru import logger
@@ -16,7 +15,6 @@ class PipelineBase(ABC):
     def run(self, project: Project, version: Version) -> Tuple[Project, Version]:
         pass
 
-
 # TODO: Move to BATCH, and use the Execution pipeline
 class BatchPipeline:
     def __init__(self, pipeline: ExecutionBase,