From 66cd23064281e376d75d5a4b8ada93135d16dcf3 Mon Sep 17 00:00:00 2001 From: Rudio Date: Fri, 8 Dec 2023 18:40:17 +0100 Subject: [PATCH 01/17] deleting tox.ini --- tox.ini | 3 --- 1 file changed, 3 deletions(-) delete mode 100755 tox.ini diff --git a/tox.ini b/tox.ini deleted file mode 100755 index c32fbd8..0000000 --- a/tox.ini +++ /dev/null @@ -1,3 +0,0 @@ -[flake8] -max-line-length = 79 -max-complexity = 10 From 1d86b7a86f35aa0963382dab91e2d0fe2d8fd095 Mon Sep 17 00:00:00 2001 From: Rudio Date: Fri, 8 Dec 2023 19:02:55 +0100 Subject: [PATCH 02/17] Adding test CI --- .github/workflows/test.yaml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .github/workflows/test.yaml diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000..3447dcd --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,27 @@ +name: Run tests CI +on: + push: + branches: + - main + - dev + pull_request: + branches: + - main + - dev +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Use python + uses: actions/setup-python@v4 + with: + python-version: '3.9' + + - name: Install dependencies + run: pip install -r requirements.txt + + - name: Run tests + run: pytest tests/ \ No newline at end of file From 105141d482344348f8fe860ef698089cb215b363 Mon Sep 17 00:00:00 2001 From: Rudio Date: Fri, 8 Dec 2023 19:12:05 +0100 Subject: [PATCH 03/17] Update readme --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index b3e0c56..0e9a80b 100755 --- a/README.md +++ b/README.md @@ -39,6 +39,19 @@ dvc pull -r origin pytest tests/ ``` +## Deployment with Docker + +1. Create the docker image + +```sh +docker build -t Sentibites:1.0 . +``` + +2. Run the container +```sh +docker run -p 5000:8000 Sentibites +``` + ## Usage For training : From 74c8ebab2f1b2433ca2b920ddd99ad2fb708d6fb Mon Sep 17 00:00:00 2001 From: Rudio Date: Fri, 8 Dec 2023 19:14:10 +0100 Subject: [PATCH 04/17] testing ci --- .github/workflows/test.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 3447dcd..944959e 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -4,6 +4,7 @@ on: branches: - main - dev + - ci/cd pull_request: branches: - main @@ -19,7 +20,7 @@ jobs: uses: actions/setup-python@v4 with: python-version: '3.9' - + - name: Install dependencies run: pip install -r requirements.txt From e6d87c8259ceae31b3368f0e18f124ec9b2a0820 Mon Sep 17 00:00:00 2001 From: Rudio Date: Fri, 8 Dec 2023 21:18:34 +0100 Subject: [PATCH 05/17] Fixing tests and request msg --- src/app/api.py | 24 +++++++++-- src/app/schemas.py | 4 ++ src/models/predict_model.py | 13 ++++-- tests/test_api.py | 81 +++++++++++++++++++++++-------------- 4 files changed, 85 insertions(+), 37 deletions(-) create mode 100644 src/app/schemas.py diff --git a/src/app/api.py b/src/app/api.py index 0cc3e3a..706b43c 100644 --- a/src/app/api.py +++ b/src/app/api.py @@ -5,9 +5,19 @@ from functools import wraps from http import HTTPStatus from typing import List +import os +import sys +from contextlib import asynccontextmanager from fastapi import FastAPI, HTTPException, Request from src.models.predict_model import SentiBites +from src.app.schemas import Review + +# Get the parent directory +parent_dir = os.path.dirname(os.path.realpath(__file__)) + +# Add the parent directory to sys.path +sys.path.append(parent_dir) model = None @@ -50,6 +60,7 @@ def _load_model(): global model model = SentiBites("models/SentiBites/") + @app.get("/", tags=["General"]) # path operation decorator @construct_response @@ -65,19 +76,24 @@ def _index(request: Request): @app.post("/models/", tags=["Prediction"]) @construct_response -def _predict(request: Request, payload: str): +def _predict(request: Request, payload: Review): """Performs sentiment analysis based on the food review.""" - + if model: - prediction = model.predict(payload) + prediction,scores = model.predict(payload.msg) response = { "message": HTTPStatus.OK.phrase, "status-code": HTTPStatus.OK, "data": { "model-type": "RoBERTaSB", - "payload": payload, + "payload": payload.msg, "prediction": prediction, + "Scores" : { + "positive" : scores['positive'], + "neutral" : scores['neutral'], + "negative" : scores['negative'] + } }, } else: diff --git a/src/app/schemas.py b/src/app/schemas.py new file mode 100644 index 0000000..19693f8 --- /dev/null +++ b/src/app/schemas.py @@ -0,0 +1,4 @@ +from pydantic import BaseModel + +class Review(BaseModel): + msg : str \ No newline at end of file diff --git a/src/models/predict_model.py b/src/models/predict_model.py index 91c70e5..8a91f64 100755 --- a/src/models/predict_model.py +++ b/src/models/predict_model.py @@ -28,11 +28,18 @@ def predict(self, text): scores = output[0][0].detach().numpy() scores = softmax(scores) - # Printing the prediction + # Selecting the best score ranking = np.argsort(scores) ranking = ranking[::-1] - - return self.config.id2label[ranking[0]] + + # Stroring the scores + res = {} + for i in range(scores.shape[0]): + length = self.config.id2label[ranking[i]] + score = scores[ranking[i]] + res[length] = float(score) + + return self.config.id2label[ranking[0]],res def preprocess(text): """remove links and mentions in a sentence""" diff --git a/tests/test_api.py b/tests/test_api.py index 1302db1..93eb43d 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,41 +1,62 @@ from fastapi.testclient import TestClient +import os +import sys + +# Get the parent directory +parent_dir = os.path.dirname(os.path.realpath(__file__)) + +# Add the parent directory to sys.path +sys.path.append(parent_dir) + from src.app.api import app from http import HTTPStatus -client = TestClient(app) - def test_read_main(): - response = client.get("/") - assert response.status_code == 200 - response_body = response.json() - assert response_body["message"] == HTTPStatus.OK.phrase - assert response_body["data"]["message"] == "Welcome to SentiBites! Please, read the `/docs`!" + with TestClient(app) as client: + response = client.get("/") + assert response.status_code == 200 + response_body = response.json() + assert response_body["message"] == HTTPStatus.OK.phrase + assert response_body["data"]["message"] == "Welcome to SentiBites! Please, read the `/docs`!" def test_read_prediction(): - response = client.post("/models/", params={"payload": "This is a test."}) - assert response.status_code == 200 - response_body = response.json() - assert response_body["message"] == HTTPStatus.OK.phrase - assert response_body["status-code"] == HTTPStatus.OK - assert response_body["data"]["model-type"] == "RoBERTaSB" - assert response_body["data"]["payload"] == "This is a test." + with TestClient(app) as client: + response = client.post("/models", json = {'msg':"This is a test."}) + assert response.status_code == 200 + response_body = response.json() + assert response_body["message"] == HTTPStatus.OK.phrase + assert response_body["status-code"] == HTTPStatus.OK + assert response_body["data"]["model-type"] == "RoBERTaSB" + assert response_body["data"]["payload"] == "This is a test." def test_positive_prediction(): - response = client.post("/models/", params={"payload": "This food is really good."}) - assert response.status_code == 200 - response_body = response.json() - assert response_body["message"] == HTTPStatus.OK.phrase - assert response_body["status-code"] == HTTPStatus.OK - assert response_body["data"]["model-type"] == "RoBERTaSB" - assert response_body["data"]["payload"] == "This food is really good." - assert response_body["data"]["prediction"] == "positive" + with TestClient(app) as client: + response = client.post("/models/", json={"msg": "This food is really good."}) + assert response.status_code == 200 + response_body = response.json() + assert response_body["message"] == HTTPStatus.OK.phrase + assert response_body["status-code"] == HTTPStatus.OK + assert response_body["data"]["model-type"] == "RoBERTaSB" + assert response_body["data"]["payload"] == "This food is really good." + assert response_body["data"]["prediction"] == "positive" def test_negative_prediction(): - response = client.post("/models/", params={"payload": "Never buying this again."}) - assert response.status_code == 200 - response_body = response.json() - assert response_body["message"] == HTTPStatus.OK.phrase - assert response_body["status-code"] == HTTPStatus.OK - assert response_body["data"]["model-type"] == "RoBERTaSB" - assert response_body["data"]["payload"] == "Never buying this again." - assert response_body["data"]["prediction"] == "negative" \ No newline at end of file + with TestClient(app) as client: + response = client.post("/models/", json={"msg": "Never buying this again."}) + assert response.status_code == 200 + response_body = response.json() + assert response_body["message"] == HTTPStatus.OK.phrase + assert response_body["status-code"] == HTTPStatus.OK + assert response_body["data"]["model-type"] == "RoBERTaSB" + assert response_body["data"]["payload"] == "Never buying this again." + assert response_body["data"]["prediction"] == "negative" + +def test_bad_url(): + with TestClient(app) as client: + response = client.post("/mode", json={"msg": "Never buying this again."}) + assert response.status_code == 404 + +def test_bad_request(): + with TestClient(app) as client: + response = client.post("/models/", json={"false": "Never buying this again."}) + assert response.status_code == 422 \ No newline at end of file From 9a041824c5e66c951a704f817da158d58985a104 Mon Sep 17 00:00:00 2001 From: Rudio Date: Fri, 8 Dec 2023 21:24:02 +0100 Subject: [PATCH 06/17] fixing tests --- tests/test_train.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_train.py b/tests/test_train.py index edf0cbf..55c4175 100644 --- a/tests/test_train.py +++ b/tests/test_train.py @@ -1,4 +1,13 @@ import pytest +import os +import sys + +# Get the parent directory +parent_dir = os.path.dirname(os.path.realpath(__file__)) + +# Add the parent directory to sys.path +sys.path.append(parent_dir) + from src.models.predict_model import predict,SentiBites from src.models.train_model import pre_processing, train import datasets From 4122e0027cf3c362cf08df431f210eabee619244 Mon Sep 17 00:00:00 2001 From: Rudio Date: Fri, 8 Dec 2023 21:36:45 +0100 Subject: [PATCH 07/17] up --- .github/workflows/test.yaml | 5 ++++- tests/test_train.py | 2 -- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 944959e..ba03925 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Use python uses: actions/setup-python@v4 @@ -24,5 +24,8 @@ jobs: - name: Install dependencies run: pip install -r requirements.txt + - name: show cwd + run : python -c "import sys; print('\n'.join(sys.path))" + - name: Run tests run: pytest tests/ \ No newline at end of file diff --git a/tests/test_train.py b/tests/test_train.py index 55c4175..ef0a8fd 100644 --- a/tests/test_train.py +++ b/tests/test_train.py @@ -1,4 +1,3 @@ -import pytest import os import sys @@ -11,7 +10,6 @@ from src.models.predict_model import predict,SentiBites from src.models.train_model import pre_processing, train import datasets -import pandas as pd MODEL = 'models/SentiBites' From 62d93c830965fe4e9269181df980390ac2c25605 Mon Sep 17 00:00:00 2001 From: Rudio Date: Fri, 8 Dec 2023 21:41:36 +0100 Subject: [PATCH 08/17] modif workflow --- .github/workflows/test.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index ba03925..da3f6ce 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Use python uses: actions/setup-python@v4 @@ -26,6 +26,6 @@ jobs: - name: show cwd run : python -c "import sys; print('\n'.join(sys.path))" - + - name: Run tests - run: pytest tests/ \ No newline at end of file + run: python -m pytest \ No newline at end of file From 09f1da2491f09f0fc33172ec1aa23d4f4b2e6a72 Mon Sep 17 00:00:00 2001 From: Rudio Date: Fri, 8 Dec 2023 21:47:14 +0100 Subject: [PATCH 09/17] test --- .github/workflows/test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index da3f6ce..d7d7b7d 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -25,7 +25,7 @@ jobs: run: pip install -r requirements.txt - name: show cwd - run : python -c "import sys; print('\n'.join(sys.path))" + run : python -c "import os; print('\n',os.getcwd())" - name: Run tests run: python -m pytest \ No newline at end of file From 34d51967f7522a3c7a317945a58aa420ae76bd9e Mon Sep 17 00:00:00 2001 From: Rudio Date: Fri, 8 Dec 2023 21:48:33 +0100 Subject: [PATCH 10/17] adding dvc to tests --- .github/workflows/test.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index d7d7b7d..dc0e4fc 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -27,5 +27,8 @@ jobs: - name: show cwd run : python -c "import os; print('\n',os.getcwd())" + - name : pull data from dvc + run : python -m dvc pull -r origin + - name: Run tests run: python -m pytest \ No newline at end of file From 360ac1bc00cfd6e206581c28aa78a6a467895a1c Mon Sep 17 00:00:00 2001 From: Rudio Date: Fri, 8 Dec 2023 22:01:54 +0100 Subject: [PATCH 11/17] update requieremnets and app --- requirements.txt | 1 + src/app/app.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8e7bf17..4c5141e 100755 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ transformers mlflow botocore tensorflow +fsspec==2023.9.2 datasets==2.11 accelerate>=0.20.1 evaluate diff --git a/src/app/app.py b/src/app/app.py index 13752e9..2f26aad 100644 --- a/src/app/app.py +++ b/src/app/app.py @@ -13,10 +13,10 @@ def index(): text = request.form.get("text") # Build the URL with the text as a parameter - url = f"{api_url}?payload={text.replace(' ', '%20')}" + payload = {'msg':text} # Call your FastAPI API - response = requests.post(url) + response = requests.post(api_url,data=payload) if response.status_code == 200: # Get the results from the API From 9c435c7fb44ce03f9eeb4999bdd01b904a47994a Mon Sep 17 00:00:00 2001 From: Rudio Date: Fri, 8 Dec 2023 22:16:26 +0100 Subject: [PATCH 12/17] correcting request format --- src/app/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app/app.py b/src/app/app.py index 2f26aad..c92e895 100644 --- a/src/app/app.py +++ b/src/app/app.py @@ -16,7 +16,7 @@ def index(): payload = {'msg':text} # Call your FastAPI API - response = requests.post(api_url,data=payload) + response = requests.post(api_url,json=payload) if response.status_code == 200: # Get the results from the API From 9fcc6343638445e0a447674802dee6ed1352902b Mon Sep 17 00:00:00 2001 From: Rudio Date: Fri, 8 Dec 2023 22:48:37 +0100 Subject: [PATCH 13/17] Adding neutral predictions --- src/app/templates/index.html | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/app/templates/index.html b/src/app/templates/index.html index c2b05e3..3e2b848 100644 --- a/src/app/templates/index.html +++ b/src/app/templates/index.html @@ -43,6 +43,10 @@ color: red; font-size: 1.5em; /* Taille plus grande */ } + .prediction-neutre { + color: blue; + font-size: 1.5em; /* Taille plus grande */ + } @@ -81,7 +85,7 @@

Analysis Result:

Analyzed Text:

{{ text }}

-

+

Prediction: {{ prediction }}

From c7ea556c705cba647d85b60b29ff19c08bfda21c Mon Sep 17 00:00:00 2001 From: Rudio Date: Fri, 8 Dec 2023 23:07:30 +0100 Subject: [PATCH 14/17] Update Readme --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0e9a80b..88195db 100755 --- a/README.md +++ b/README.md @@ -49,10 +49,10 @@ docker build -t Sentibites:1.0 . 2. Run the container ```sh -docker run -p 5000:8000 Sentibites +docker run -p 5000:5000 -p 8000:8000 Sentibites ``` -## Usage +## Usage without App For training : @@ -63,5 +63,5 @@ python3 src/models/train_model.py --model "roberta-base" --dataset data/processe For inference : ```sh -python3 src/models/predict_model.py --input "text" +python3 src/models/predict_model.py --model "models/SentiBites" --input "text" ``` \ No newline at end of file From 9bd71d64704b82c768f9c66ac2b923d65f708476 Mon Sep 17 00:00:00 2001 From: Rudio Date: Fri, 8 Dec 2023 23:09:57 +0100 Subject: [PATCH 15/17] deleting useless files/folders --- data/external/.gitkeep | 0 data/interim/.gitkeep | 0 setup.py | 10 ---------- test_environment.py | 25 ------------------------- 4 files changed, 35 deletions(-) delete mode 100644 data/external/.gitkeep delete mode 100644 data/interim/.gitkeep delete mode 100755 setup.py delete mode 100755 test_environment.py diff --git a/data/external/.gitkeep b/data/external/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/data/interim/.gitkeep b/data/interim/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/setup.py b/setup.py deleted file mode 100755 index a8110ed..0000000 --- a/setup.py +++ /dev/null @@ -1,10 +0,0 @@ -from setuptools import find_packages, setup - -setup( - name='src', - packages=find_packages(), - version='0.1.0', - description='Sentiment analysis on Amazon reviews for the MLOps course', - author='SentiBites', - license='MIT', -) diff --git a/test_environment.py b/test_environment.py deleted file mode 100755 index d0ac4a7..0000000 --- a/test_environment.py +++ /dev/null @@ -1,25 +0,0 @@ -import sys - -REQUIRED_PYTHON = "python3" - - -def main(): - system_major = sys.version_info.major - if REQUIRED_PYTHON == "python": - required_major = 2 - elif REQUIRED_PYTHON == "python3": - required_major = 3 - else: - raise ValueError("Unrecognized python interpreter: {}".format( - REQUIRED_PYTHON)) - - if system_major != required_major: - raise TypeError( - "This project requires Python {}. Found: Python {}".format( - required_major, sys.version)) - else: - print(">>> Development environment passes all tests!") - - -if __name__ == '__main__': - main() From 95db94dbb18f1a5a4945798f75b1893db180f613 Mon Sep 17 00:00:00 2001 From: Rudio Date: Fri, 8 Dec 2023 23:21:26 +0100 Subject: [PATCH 16/17] FInishing Ci --- .github/workflows/{test.yaml => ci.yaml} | 1 - 1 file changed, 1 deletion(-) rename .github/workflows/{test.yaml => ci.yaml} (97%) diff --git a/.github/workflows/test.yaml b/.github/workflows/ci.yaml similarity index 97% rename from .github/workflows/test.yaml rename to .github/workflows/ci.yaml index dc0e4fc..890e8fb 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/ci.yaml @@ -4,7 +4,6 @@ on: branches: - main - dev - - ci/cd pull_request: branches: - main From efd1101269e8b21e68fc72e0d88761deadcf623b Mon Sep 17 00:00:00 2001 From: Rudio Date: Sat, 9 Dec 2023 09:35:39 +0100 Subject: [PATCH 17/17] up dockerfile --- Dockerfile | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index fc99311..541c598 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,15 @@ FROM python:3.10 + ENV PYTHONUNBUFFERED 1 + COPY requirements.txt . + WORKDIR /app + COPY . /app/ -RUN pip install -r requirements.txt + +RUN pip install --no-cache-dir -r requirements.txt + EXPOSE 5000 + CMD ["python3", "run_servers.py"]