From 84137138525fce2fca0dba9b4eea5cc2c9a75658 Mon Sep 17 00:00:00 2001
From: lis-r-barreto <lis_barreto@outlook.com>
Date: Sun, 23 Jun 2024 18:43:14 -0300
Subject: [PATCH] feat: Add dagshub token

---
 .github/workflows/pipeline.yml | 21 -----------
 app/train.py                   | 64 ++++++++++++++++++----------------
 tests/test_train.py            | 24 ++++++-------
 3 files changed, 46 insertions(+), 63 deletions(-)

diff --git a/.github/workflows/pipeline.yml b/.github/workflows/pipeline.yml
index 1b26fca..958cbc0 100644
--- a/.github/workflows/pipeline.yml
+++ b/.github/workflows/pipeline.yml
@@ -31,27 +31,6 @@ jobs:
         run: |
           pytest tests/test_train.py
 
-  check_python_code_style:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v1
-        with:
-          fetch-depth: 0
-
-      - name: Set up Python 3.11
-        uses: actions/setup-python@v1
-        with:
-          python-version: 3.11
-
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-
-      - name: Validate Python Code Style
-        run: |
-          pycodestyle .
-
   train_pipeline:
       runs-on: ubuntu-latest
       needs: [test_train]
diff --git a/app/train.py b/app/train.py
index aabb282..fa719b8 100644
--- a/app/train.py
+++ b/app/train.py
@@ -24,7 +24,7 @@ def reset_seeds():
     Returns:
         None
     """
-    os.environ['PYTHONHASHSEED'] = str(42)
+    os.environ["PYTHONHASHSEED"] = str(42)
     tf.random.set_seed(42)
     np.random.seed(42)
     random.seed(42)
@@ -39,7 +39,8 @@ def read_data():
         y (pandas.Series): The target vector of shape (n_samples,).
     """
     data = pd.read_csv(
-        'https://raw.githubusercontent.com/lis-r-barreto/mlops-mlflow-classification-experiment/main/data/fetal_health_reduced.csv')
+        "https://raw.githubusercontent.com/lis-r-barreto/mlops-mlflow-classification-experiment/main/data/fetal_health_reduced.csv"
+    )
     X = data.drop(["fetal_health"], axis=1)
     y = data["fetal_health"]
     return X, y
@@ -65,10 +66,9 @@ def process_data(X, y):
     X_df = scaler.fit_transform(X)
     X_df = pd.DataFrame(X_df, columns=columns_names)
 
-    X_train, X_test, y_train, y_test = train_test_split(X_df,
-                                                        y,
-                                                        test_size=0.3,
-                                                        random_state=42)
+    X_train, X_test, y_train, y_test = train_test_split(
+        X_df, y, test_size=0.3, random_state=42
+    )
 
     y_train = y_train - 1
     y_test = y_test - 1
@@ -89,13 +89,15 @@ def create_model(X):
     reset_seeds()
     model = Sequential()
     model.add(InputLayer(input_shape=(X.shape[1],)))
-    model.add(Dense(10, activation='relu'))
-    model.add(Dense(10, activation='relu'))
-    model.add(Dense(3, activation='softmax'))
-
-    model.compile(loss='sparse_categorical_crossentropy',
-                  optimizer='adam',
-                  metrics=['accuracy'])
+    model.add(Dense(10, activation="relu"))
+    model.add(Dense(10, activation="relu"))
+    model.add(Dense(3, activation="softmax"))
+
+    model.compile(
+        loss="sparse_categorical_crossentropy",
+        optimizer="adam",
+        metrics=["accuracy"],
+    )
     return model
 
 
@@ -119,17 +121,23 @@ def config_mlflow():
     Returns:
         None
     """
-    mlflow_username = os.getenv('MLFLOW_TRACKING_USERNAME')
-    mlflow_password = os.getenv('MLFLOW_TRACKING_PASSWORD')
+    # mlflow_username = os.getenv("MLFLOW_TRACKING_USERNAME")
+    mlflow_username = os.getenv("lis-r-barreto")
+    # mlflow_password = os.getenv("MLFLOW_TRACKING_PASSWORD")
+    mlflow_password = os.getenv("4e756087b8703385f28b2787fca9832416cc0c83")
     if not mlflow_username or not mlflow_password:
-        raise ValueError("MLFLOW_TRACKING_USERNAME and MLFLOW_TRACKING_PASSWORD environment variables must be set")
-    os.environ['MLFLOW_TRACKING_USERNAME'] = mlflow_username
-    os.environ['MLFLOW_TRACKING_PASSWORD'] = mlflow_password
-    mlflow.set_tracking_uri('https://dagshub.com/lis-r-barreto/mlops-mlflow-experiments.mlflow')
+        raise ValueError(
+            "MLFLOW_TRACKING_USERNAME and MLFLOW_TRACKING_PASSWORD environment variables must be set"
+        )
+    os.environ["MLFLOW_TRACKING_USERNAME"] = mlflow_username
+    os.environ["MLFLOW_TRACKING_PASSWORD"] = mlflow_password
+    mlflow.set_tracking_uri(
+        "https://dagshub.com/lis-r-barreto/mlops-mlflow-experiments.mlflow"
+    )
 
-    mlflow.tensorflow.autolog(log_models=True,
-                              log_input_examples=True,
-                              log_model_signatures=True)
+    mlflow.tensorflow.autolog(
+        log_models=True, log_input_examples=True, log_model_signatures=True
+    )
 
 
 def train_model(model, X_train, y_train, is_train=True):
@@ -147,15 +155,11 @@ def train_model(model, X_train, y_train, is_train=True):
     Returns:
     None
     """
-    with mlflow.start_run(run_name='experiment_fetal_health') as run:
-        model.fit(X_train,
-                  y_train,
-                  epochs=50,
-                  validation_split=0.2,
-                  verbose=3)
+    with mlflow.start_run(run_name="experiment_fetal_health") as run:
+        model.fit(X_train, y_train, epochs=50, validation_split=0.2, verbose=3)
     if is_train:
-        run_uri = f'runs:/{run.info.run_id}'
-        mlflow.register_model(run_uri, 'fetal_health')
+        run_uri = f"runs:/{run.info.run_id}"
+        mlflow.register_model(run_uri, "fetal_health")
 
 
 if __name__ == "__main__":
diff --git a/tests/test_train.py b/tests/test_train.py
index c345acc..5484040 100644
--- a/tests/test_train.py
+++ b/tests/test_train.py
@@ -2,9 +2,7 @@
 import pytest
 from tensorflow.keras.models import Sequential
 
-from app.train import (read_data,
-                   create_model,
-                   train_model)
+from app.train import read_data, create_model, train_model
 
 
 @pytest.fixture
@@ -16,11 +14,13 @@ def sample_data():
         pandas.DataFrame: A DataFrame containing sample data with three columns: 'feature1',
          'feature2', and 'fetal_health'.
     """
-    data = pd.DataFrame({
-        'feature1': [1, 2, 3, 4, 5],
-        'feature2': [6, 7, 8, 9, 10],
-        'fetal_health': [1, 1, 2, 3, 2]
-    })
+    data = pd.DataFrame(
+        {
+            "feature1": [1, 2, 3, 4, 5],
+            "feature2": [6, 7, 8, 9, 10],
+            "fetal_health": [1, 1, 2, 3, 2],
+        }
+    )
     return data
 
 
@@ -66,9 +66,9 @@ def test_train_model(sample_data):
     Returns:
         None
     """
-    X = sample_data.drop(['fetal_health'], axis=1)
-    y = sample_data['fetal_health'] - 1
+    X = sample_data.drop(["fetal_health"], axis=1)
+    y = sample_data["fetal_health"] - 1
     model = create_model(X)
     train_model(model, X, y, is_train=False)
-    assert model.history.history['loss'][-1] > 0
-    assert model.history.history['val_loss'][-1] > 0
+    assert model.history.history["loss"][-1] > 0
+    assert model.history.history["val_loss"][-1] > 0