From 4db2a0ce05428e09dd4feb59bfa73d48366af31e Mon Sep 17 00:00:00 2001 From: Cezar Sas Date: Thu, 3 Oct 2024 11:16:34 +0200 Subject: [PATCH] Fix and detached the dependency on the ui as git module --- .gitmodules | 3 --- autofl-ui | 1 - config/main.yaml | 2 +- docker-compose.yaml | 9 +------ docker/Dockerfile | 30 ++++++++++++++------- pyproject.toml | 2 +- src/annotation/similarity.py | 4 +-- src/embedding/__init__.py | 1 + src/embedding/{embedding.py => abstract.py} | 0 src/embedding/ft.py | 6 ++--- src/embedding/gensim_w2v.py | 10 +++---- src/embedding/huggingface.py | 8 +++--- src/embedding/spacy_bert.py | 6 ++--- src/entity/__init__.py | 1 + src/pipeline/pipeline.py | 2 -- 15 files changed, 42 insertions(+), 43 deletions(-) delete mode 160000 autofl-ui rename src/embedding/{embedding.py => abstract.py} (100%) diff --git a/.gitmodules b/.gitmodules index 6a10f54..e69de29 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "autofl-ui"] - path = autofl-ui - url = git@github.com:SasCezar/autofl-ui.git diff --git a/autofl-ui b/autofl-ui deleted file mode 160000 index b697f23..0000000 --- a/autofl-ui +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b697f23cd1aed04cc1ac7c24aded46a9f4fb6155 diff --git a/config/main.yaml b/config/main.yaml index d540573..9103a43 100644 --- a/config/main.yaml +++ b/config/main.yaml @@ -3,7 +3,7 @@ defaults: - _self_ - local: docker - taxonomy: gitranking - - annotator: simple + - annotator: default - version_strategy: latest - dataloader: postgres - writer: postgres diff --git a/docker-compose.yaml b/docker-compose.yaml index 96925ac..09cb482 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,4 +1,3 @@ -version: "3.8" services: api: build: @@ -39,16 +38,10 @@ services: - "8080:8080" ui: - build: - context: autofl-ui/ - dockerfile: ./docker/Dockerfile + image: cezarsas/autofl-ui:latest ports: - "8501:8501" stdin_open: true # docker run -i tty: true # docker run -t - volumes: - - type: bind - source: ./autofl-ui/ - target: /autofl-ui links: - 'api:auto-fl' \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile index f9e8d90..1b81f1f 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,13 +1,25 @@ -FROM python:3.10 as python-base -RUN mkdir autofl -WORKDIR /autofl -COPY pyproject.toml /autofl -RUN pip3 install poetry -RUN poetry config virtualenvs.create false -RUN poetry install -n --no-ansi --without dev +# Start with the official Python 3.10 image as base +FROM python:3.10 AS python-base + +# Set working directory and copy pyproject.toml +WORKDIR /autofl +COPY pyproject.toml . + +# Install poetry in a single layer and configure it +RUN pip install --no-cache-dir poetry \ + && poetry config virtualenvs.create false \ + && poetry install --no-root --no-ansi --without dev + +# Copy the remaining application code COPY . . + +# Set environment variables ENV PYTHONPATH=/autofl/src +ENV HYDRA_FULL_ERROR=1 -HEALTHCHECK CMD curl --fail http://localhost:8000 +# Define a health check command +HEALTHCHECK --interval=30s --timeout=10s --retries=3 \ + CMD curl --fail http://localhost:8000 || exit 1 -CMD ["gunicorn", "-w", "4", "-k", "uvicorn.workers.UvicornWorker", "api.main:app", "--bind", "0.0.0.0:8000", "--timeout", "100000"] \ No newline at end of file +# Set the default command to start the app +CMD ["gunicorn", "-w", "4", "-k", "uvicorn.workers.UvicornWorker", "api.main:app", "--bind", "0.0.0.0:8000", "--timeout", "100000"] diff --git a/pyproject.toml b/pyproject.toml index c5c709c..a3ce6bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ more-itertools = "^10.2.0" tree-sitter = "^0.20.4" sqlalchemy = "^2.0.25" psycopg = {extras = ["binary"], version = "^3.1.17"} -gensim = "^4.3.2" +gensim = "^4.3.3" fasttext-wheel = "^0.9.2" transformers = "^4.37.1" sentence-transformers = "^2.2.2" diff --git a/src/annotation/similarity.py b/src/annotation/similarity.py index 8c49d5c..d39f64d 100644 --- a/src/annotation/similarity.py +++ b/src/annotation/similarity.py @@ -4,8 +4,8 @@ from loguru import logger from sklearn.metrics.pairwise import cosine_similarity -from embedding.embedding import AbstractEmbeddingModel -from entity.taxonomy import TaxonomyBase +from embedding import AbstractEmbeddingModel +from entity import TaxonomyBase from annotation import LFBase diff --git a/src/embedding/__init__.py b/src/embedding/__init__.py index e69de29..67c4501 100644 --- a/src/embedding/__init__.py +++ b/src/embedding/__init__.py @@ -0,0 +1 @@ +from .abstract import AbstractEmbeddingModel \ No newline at end of file diff --git a/src/embedding/embedding.py b/src/embedding/abstract.py similarity index 100% rename from src/embedding/embedding.py rename to src/embedding/abstract.py diff --git a/src/embedding/ft.py b/src/embedding/ft.py index 0c8f622..819e340 100644 --- a/src/embedding/ft.py +++ b/src/embedding/ft.py @@ -1,7 +1,7 @@ import fasttext as ft -import numpy +import numpy as np -from embedding.embedding import AbstractEmbeddingModel +from embedding.abstract import AbstractEmbeddingModel class FastTextEmbedding(AbstractEmbeddingModel): @@ -14,7 +14,7 @@ def __init__(self, path: str, model: str = 'fastText', split_camel: bool = False self._name = f'{model}' self.model = ft.load_model(path) - def get_embedding(self, text: str) -> numpy.ndarray: + def get_embedding(self, text: str) -> np.ndarray: """ Returns the embedding of the text. :param text: diff --git a/src/embedding/gensim_w2v.py b/src/embedding/gensim_w2v.py index 6da1f53..2447009 100644 --- a/src/embedding/gensim_w2v.py +++ b/src/embedding/gensim_w2v.py @@ -1,9 +1,7 @@ -import re - -import numpy import numpy as np from gensim.models import KeyedVectors -from embedding.embedding import AbstractEmbeddingModel + +from embedding import AbstractEmbeddingModel class W2VEmbedding(AbstractEmbeddingModel): @@ -16,7 +14,7 @@ def __init__(self, path: str, model: str = 'W2V-Unk', split_camel: bool = False) self._name = f'{model}' self.model = KeyedVectors.load_word2vec_format(path, binary=True) - def get_embedding(self, text: str) -> numpy.ndarray: + def get_embedding(self, text: str) -> np.ndarray: """ Returns the embedding of the text. :param text: @@ -30,4 +28,4 @@ def get_embedding(self, text: str) -> numpy.ndarray: embeddings.append(self.model[word]) else: embeddings.append(np.zeros(self.model.vector_size)) - return numpy.mean(embeddings, axis=0) + return np.mean(embeddings, axis=0) diff --git a/src/embedding/huggingface.py b/src/embedding/huggingface.py index 9a85f0f..755a104 100644 --- a/src/embedding/huggingface.py +++ b/src/embedding/huggingface.py @@ -1,7 +1,7 @@ -import numpy +import numpy as np import torch -from embedding.embedding import AbstractEmbeddingModel +from embedding.abstract import AbstractEmbeddingModel from sentence_transformers import SentenceTransformer from transformers import BertModel, BertTokenizer @@ -18,7 +18,7 @@ def __init__(self, name, model, split_camel: bool = False): self.model = BertModel.from_pretrained(model) self.tokenizer = BertTokenizer.from_pretrained(model, do_lower_case=do_lower_case) - def get_embedding(self, text: str) -> numpy.ndarray: + def get_embedding(self, text: str) -> np.ndarray: """ Returns the embedding of the text. :param text: @@ -39,7 +39,7 @@ def __init__(self, name, model, device='cpu', split_camel: bool = False): self.model = SentenceTransformer(model, device=device) self.model.tokenizer.add_special_tokens({'pad_token': '[PAD]'}) - def get_embedding(self, text: str) -> numpy.ndarray: + def get_embedding(self, text: str) -> np.ndarray: """ Returns the embedding of the text. :param text: diff --git a/src/embedding/spacy_bert.py b/src/embedding/spacy_bert.py index b07bf3b..01fd402 100644 --- a/src/embedding/spacy_bert.py +++ b/src/embedding/spacy_bert.py @@ -1,6 +1,6 @@ -import numpy +import numpy as np -from embedding.embedding import AbstractEmbeddingModel +from embedding.abstract import AbstractEmbeddingModel class BERTEmbedding(AbstractEmbeddingModel): @@ -13,7 +13,7 @@ def __init__(self, model, split_camel: bool = False): self._name = f'{model}' self.model = spacy.load(model, disable=["ner", "textcat", "parser"]) - def get_embedding(self, text: str) -> numpy.ndarray: + def get_embedding(self, text: str) -> np.ndarray: """ Returns the embedding of the text. :param text: diff --git a/src/entity/__init__.py b/src/entity/__init__.py index e69de29..e1f837f 100644 --- a/src/entity/__init__.py +++ b/src/entity/__init__.py @@ -0,0 +1 @@ +from .taxonomy import TaxonomyBase \ No newline at end of file diff --git a/src/pipeline/pipeline.py b/src/pipeline/pipeline.py index ad102d3..be68532 100644 --- a/src/pipeline/pipeline.py +++ b/src/pipeline/pipeline.py @@ -1,5 +1,4 @@ from abc import ABC -from pathlib import Path from typing import List, Tuple from loguru import logger @@ -16,7 +15,6 @@ class PipelineBase(ABC): def run(self, project: Project, version: Version) -> Tuple[Project, Version]: pass - # TODO: Move to BATCH, and use the Execution pipeline class BatchPipeline: def __init__(self, pipeline: ExecutionBase,