diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..890e8fb --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,33 @@ +name: Run tests CI +on: + push: + branches: + - main + - dev + pull_request: + branches: + - main + - dev +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Use python + uses: actions/setup-python@v4 + with: + python-version: '3.9' + + - name: Install dependencies + run: pip install -r requirements.txt + + - name: show cwd + run : python -c "import os; print('\n',os.getcwd())" + + - name : pull data from dvc + run : python -m dvc pull -r origin + + - name: Run tests + run: python -m pytest \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index fc99311..541c598 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,15 @@ FROM python:3.10 + ENV PYTHONUNBUFFERED 1 + COPY requirements.txt . + WORKDIR /app + COPY . /app/ -RUN pip install -r requirements.txt + +RUN pip install --no-cache-dir -r requirements.txt + EXPOSE 5000 + CMD ["python3", "run_servers.py"] diff --git a/README.md b/README.md index b3e0c56..88195db 100755 --- a/README.md +++ b/README.md @@ -39,7 +39,20 @@ dvc pull -r origin pytest tests/ ``` -## Usage +## Deployment with Docker + +1. Create the docker image + +```sh +docker build -t Sentibites:1.0 . +``` + +2. Run the container +```sh +docker run -p 5000:5000 -p 8000:8000 Sentibites +``` + +## Usage without App For training : @@ -50,5 +63,5 @@ python3 src/models/train_model.py --model "roberta-base" --dataset data/processe For inference : ```sh -python3 src/models/predict_model.py --input "text" +python3 src/models/predict_model.py --model "models/SentiBites" --input "text" ``` \ No newline at end of file diff --git a/data/external/.gitkeep b/data/external/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/data/interim/.gitkeep b/data/interim/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/requirements.txt b/requirements.txt index 8e7bf17..4c5141e 100755 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ transformers mlflow botocore tensorflow +fsspec==2023.9.2 datasets==2.11 accelerate>=0.20.1 evaluate diff --git a/setup.py b/setup.py deleted file mode 100755 index a8110ed..0000000 --- a/setup.py +++ /dev/null @@ -1,10 +0,0 @@ -from setuptools import find_packages, setup - -setup( - name='src', - packages=find_packages(), - version='0.1.0', - description='Sentiment analysis on Amazon reviews for the MLOps course', - author='SentiBites', - license='MIT', -) diff --git a/src/app/api.py b/src/app/api.py index 0cc3e3a..706b43c 100644 --- a/src/app/api.py +++ b/src/app/api.py @@ -5,9 +5,19 @@ from functools import wraps from http import HTTPStatus from typing import List +import os +import sys +from contextlib import asynccontextmanager from fastapi import FastAPI, HTTPException, Request from src.models.predict_model import SentiBites +from src.app.schemas import Review + +# Get the parent directory +parent_dir = os.path.dirname(os.path.realpath(__file__)) + +# Add the parent directory to sys.path +sys.path.append(parent_dir) model = None @@ -50,6 +60,7 @@ def _load_model(): global model model = SentiBites("models/SentiBites/") + @app.get("/", tags=["General"]) # path operation decorator @construct_response @@ -65,19 +76,24 @@ def _index(request: Request): @app.post("/models/", tags=["Prediction"]) @construct_response -def _predict(request: Request, payload: str): +def _predict(request: Request, payload: Review): """Performs sentiment analysis based on the food review.""" - + if model: - prediction = model.predict(payload) + prediction,scores = model.predict(payload.msg) response = { "message": HTTPStatus.OK.phrase, "status-code": HTTPStatus.OK, "data": { "model-type": "RoBERTaSB", - "payload": payload, + "payload": payload.msg, "prediction": prediction, + "Scores" : { + "positive" : scores['positive'], + "neutral" : scores['neutral'], + "negative" : scores['negative'] + } }, } else: diff --git a/src/app/app.py b/src/app/app.py index 13752e9..c92e895 100644 --- a/src/app/app.py +++ b/src/app/app.py @@ -13,10 +13,10 @@ def index(): text = request.form.get("text") # Build the URL with the text as a parameter - url = f"{api_url}?payload={text.replace(' ', '%20')}" + payload = {'msg':text} # Call your FastAPI API - response = requests.post(url) + response = requests.post(api_url,json=payload) if response.status_code == 200: # Get the results from the API diff --git a/src/app/schemas.py b/src/app/schemas.py new file mode 100644 index 0000000..19693f8 --- /dev/null +++ b/src/app/schemas.py @@ -0,0 +1,4 @@ +from pydantic import BaseModel + +class Review(BaseModel): + msg : str \ No newline at end of file diff --git a/src/app/templates/index.html b/src/app/templates/index.html index c2b05e3..3e2b848 100644 --- a/src/app/templates/index.html +++ b/src/app/templates/index.html @@ -43,6 +43,10 @@ color: red; font-size: 1.5em; /* Taille plus grande */ } + .prediction-neutre { + color: blue; + font-size: 1.5em; /* Taille plus grande */ + }
@@ -81,7 +85,7 @@Analyzed Text:
{{ text }}
-+
Prediction: {{ prediction }}
diff --git a/src/models/predict_model.py b/src/models/predict_model.py index 91c70e5..8a91f64 100755 --- a/src/models/predict_model.py +++ b/src/models/predict_model.py @@ -28,11 +28,18 @@ def predict(self, text): scores = output[0][0].detach().numpy() scores = softmax(scores) - # Printing the prediction + # Selecting the best score ranking = np.argsort(scores) ranking = ranking[::-1] - - return self.config.id2label[ranking[0]] + + # Stroring the scores + res = {} + for i in range(scores.shape[0]): + length = self.config.id2label[ranking[i]] + score = scores[ranking[i]] + res[length] = float(score) + + return self.config.id2label[ranking[0]],res def preprocess(text): """remove links and mentions in a sentence""" diff --git a/test_environment.py b/test_environment.py deleted file mode 100755 index d0ac4a7..0000000 --- a/test_environment.py +++ /dev/null @@ -1,25 +0,0 @@ -import sys - -REQUIRED_PYTHON = "python3" - - -def main(): - system_major = sys.version_info.major - if REQUIRED_PYTHON == "python": - required_major = 2 - elif REQUIRED_PYTHON == "python3": - required_major = 3 - else: - raise ValueError("Unrecognized python interpreter: {}".format( - REQUIRED_PYTHON)) - - if system_major != required_major: - raise TypeError( - "This project requires Python {}. Found: Python {}".format( - required_major, sys.version)) - else: - print(">>> Development environment passes all tests!") - - -if __name__ == '__main__': - main() diff --git a/tests/test_api.py b/tests/test_api.py index 1302db1..93eb43d 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,41 +1,62 @@ from fastapi.testclient import TestClient +import os +import sys + +# Get the parent directory +parent_dir = os.path.dirname(os.path.realpath(__file__)) + +# Add the parent directory to sys.path +sys.path.append(parent_dir) + from src.app.api import app from http import HTTPStatus -client = TestClient(app) - def test_read_main(): - response = client.get("/") - assert response.status_code == 200 - response_body = response.json() - assert response_body["message"] == HTTPStatus.OK.phrase - assert response_body["data"]["message"] == "Welcome to SentiBites! Please, read the `/docs`!" + with TestClient(app) as client: + response = client.get("/") + assert response.status_code == 200 + response_body = response.json() + assert response_body["message"] == HTTPStatus.OK.phrase + assert response_body["data"]["message"] == "Welcome to SentiBites! Please, read the `/docs`!" def test_read_prediction(): - response = client.post("/models/", params={"payload": "This is a test."}) - assert response.status_code == 200 - response_body = response.json() - assert response_body["message"] == HTTPStatus.OK.phrase - assert response_body["status-code"] == HTTPStatus.OK - assert response_body["data"]["model-type"] == "RoBERTaSB" - assert response_body["data"]["payload"] == "This is a test." + with TestClient(app) as client: + response = client.post("/models", json = {'msg':"This is a test."}) + assert response.status_code == 200 + response_body = response.json() + assert response_body["message"] == HTTPStatus.OK.phrase + assert response_body["status-code"] == HTTPStatus.OK + assert response_body["data"]["model-type"] == "RoBERTaSB" + assert response_body["data"]["payload"] == "This is a test." def test_positive_prediction(): - response = client.post("/models/", params={"payload": "This food is really good."}) - assert response.status_code == 200 - response_body = response.json() - assert response_body["message"] == HTTPStatus.OK.phrase - assert response_body["status-code"] == HTTPStatus.OK - assert response_body["data"]["model-type"] == "RoBERTaSB" - assert response_body["data"]["payload"] == "This food is really good." - assert response_body["data"]["prediction"] == "positive" + with TestClient(app) as client: + response = client.post("/models/", json={"msg": "This food is really good."}) + assert response.status_code == 200 + response_body = response.json() + assert response_body["message"] == HTTPStatus.OK.phrase + assert response_body["status-code"] == HTTPStatus.OK + assert response_body["data"]["model-type"] == "RoBERTaSB" + assert response_body["data"]["payload"] == "This food is really good." + assert response_body["data"]["prediction"] == "positive" def test_negative_prediction(): - response = client.post("/models/", params={"payload": "Never buying this again."}) - assert response.status_code == 200 - response_body = response.json() - assert response_body["message"] == HTTPStatus.OK.phrase - assert response_body["status-code"] == HTTPStatus.OK - assert response_body["data"]["model-type"] == "RoBERTaSB" - assert response_body["data"]["payload"] == "Never buying this again." - assert response_body["data"]["prediction"] == "negative" \ No newline at end of file + with TestClient(app) as client: + response = client.post("/models/", json={"msg": "Never buying this again."}) + assert response.status_code == 200 + response_body = response.json() + assert response_body["message"] == HTTPStatus.OK.phrase + assert response_body["status-code"] == HTTPStatus.OK + assert response_body["data"]["model-type"] == "RoBERTaSB" + assert response_body["data"]["payload"] == "Never buying this again." + assert response_body["data"]["prediction"] == "negative" + +def test_bad_url(): + with TestClient(app) as client: + response = client.post("/mode", json={"msg": "Never buying this again."}) + assert response.status_code == 404 + +def test_bad_request(): + with TestClient(app) as client: + response = client.post("/models/", json={"false": "Never buying this again."}) + assert response.status_code == 422 \ No newline at end of file diff --git a/tests/test_train.py b/tests/test_train.py index edf0cbf..ef0a8fd 100644 --- a/tests/test_train.py +++ b/tests/test_train.py @@ -1,8 +1,15 @@ -import pytest +import os +import sys + +# Get the parent directory +parent_dir = os.path.dirname(os.path.realpath(__file__)) + +# Add the parent directory to sys.path +sys.path.append(parent_dir) + from src.models.predict_model import predict,SentiBites from src.models.train_model import pre_processing, train import datasets -import pandas as pd MODEL = 'models/SentiBites' diff --git a/tox.ini b/tox.ini deleted file mode 100755 index c32fbd8..0000000 --- a/tox.ini +++ /dev/null @@ -1,3 +0,0 @@ -[flake8] -max-line-length = 79 -max-complexity = 10