Skip to content

Commit

Permalink
feat: Add dagshub token
Browse files Browse the repository at this point in the history
  • Loading branch information
lis-r-barreto committed Jun 23, 2024
1 parent 0fcf7a1 commit 8413713
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 63 deletions.
21 changes: 0 additions & 21 deletions .github/workflows/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,27 +31,6 @@ jobs:
run: |
pytest tests/test_train.py
check_python_code_style:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
with:
fetch-depth: 0

- name: Set up Python 3.11
uses: actions/setup-python@v1
with:
python-version: 3.11

- name: Install dependencies
run: |
python -m pip install --upgrade pip
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Validate Python Code Style
run: |
pycodestyle .
train_pipeline:
runs-on: ubuntu-latest
needs: [test_train]
Expand Down
64 changes: 34 additions & 30 deletions app/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def reset_seeds():
Returns:
None
"""
os.environ['PYTHONHASHSEED'] = str(42)
os.environ["PYTHONHASHSEED"] = str(42)
tf.random.set_seed(42)
np.random.seed(42)
random.seed(42)
Expand All @@ -39,7 +39,8 @@ def read_data():
y (pandas.Series): The target vector of shape (n_samples,).
"""
data = pd.read_csv(
'https://raw.githubusercontent.com/lis-r-barreto/mlops-mlflow-classification-experiment/main/data/fetal_health_reduced.csv')
"https://raw.githubusercontent.com/lis-r-barreto/mlops-mlflow-classification-experiment/main/data/fetal_health_reduced.csv"
)
X = data.drop(["fetal_health"], axis=1)
y = data["fetal_health"]
return X, y
Expand All @@ -65,10 +66,9 @@ def process_data(X, y):
X_df = scaler.fit_transform(X)
X_df = pd.DataFrame(X_df, columns=columns_names)

X_train, X_test, y_train, y_test = train_test_split(X_df,
y,
test_size=0.3,
random_state=42)
X_train, X_test, y_train, y_test = train_test_split(
X_df, y, test_size=0.3, random_state=42
)

y_train = y_train - 1
y_test = y_test - 1
Expand All @@ -89,13 +89,15 @@ def create_model(X):
reset_seeds()
model = Sequential()
model.add(InputLayer(input_shape=(X.shape[1],)))
model.add(Dense(10, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(3, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.add(Dense(10, activation="relu"))
model.add(Dense(10, activation="relu"))
model.add(Dense(3, activation="softmax"))

model.compile(
loss="sparse_categorical_crossentropy",
optimizer="adam",
metrics=["accuracy"],
)
return model


Expand All @@ -119,17 +121,23 @@ def config_mlflow():
Returns:
None
"""
mlflow_username = os.getenv('MLFLOW_TRACKING_USERNAME')
mlflow_password = os.getenv('MLFLOW_TRACKING_PASSWORD')
# mlflow_username = os.getenv("MLFLOW_TRACKING_USERNAME")
mlflow_username = os.getenv("lis-r-barreto")
# mlflow_password = os.getenv("MLFLOW_TRACKING_PASSWORD")
mlflow_password = os.getenv("4e756087b8703385f28b2787fca9832416cc0c83")
if not mlflow_username or not mlflow_password:
raise ValueError("MLFLOW_TRACKING_USERNAME and MLFLOW_TRACKING_PASSWORD environment variables must be set")
os.environ['MLFLOW_TRACKING_USERNAME'] = mlflow_username
os.environ['MLFLOW_TRACKING_PASSWORD'] = mlflow_password
mlflow.set_tracking_uri('https://dagshub.com/lis-r-barreto/mlops-mlflow-experiments.mlflow')
raise ValueError(
"MLFLOW_TRACKING_USERNAME and MLFLOW_TRACKING_PASSWORD environment variables must be set"
)
os.environ["MLFLOW_TRACKING_USERNAME"] = mlflow_username
os.environ["MLFLOW_TRACKING_PASSWORD"] = mlflow_password
mlflow.set_tracking_uri(
"https://dagshub.com/lis-r-barreto/mlops-mlflow-experiments.mlflow"
)

mlflow.tensorflow.autolog(log_models=True,
log_input_examples=True,
log_model_signatures=True)
mlflow.tensorflow.autolog(
log_models=True, log_input_examples=True, log_model_signatures=True
)


def train_model(model, X_train, y_train, is_train=True):
Expand All @@ -147,15 +155,11 @@ def train_model(model, X_train, y_train, is_train=True):
Returns:
None
"""
with mlflow.start_run(run_name='experiment_fetal_health') as run:
model.fit(X_train,
y_train,
epochs=50,
validation_split=0.2,
verbose=3)
with mlflow.start_run(run_name="experiment_fetal_health") as run:
model.fit(X_train, y_train, epochs=50, validation_split=0.2, verbose=3)
if is_train:
run_uri = f'runs:/{run.info.run_id}'
mlflow.register_model(run_uri, 'fetal_health')
run_uri = f"runs:/{run.info.run_id}"
mlflow.register_model(run_uri, "fetal_health")


if __name__ == "__main__":
Expand Down
24 changes: 12 additions & 12 deletions tests/test_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@
import pytest
from tensorflow.keras.models import Sequential

from app.train import (read_data,
create_model,
train_model)
from app.train import read_data, create_model, train_model


@pytest.fixture
Expand All @@ -16,11 +14,13 @@ def sample_data():
pandas.DataFrame: A DataFrame containing sample data with three columns: 'feature1',
'feature2', and 'fetal_health'.
"""
data = pd.DataFrame({
'feature1': [1, 2, 3, 4, 5],
'feature2': [6, 7, 8, 9, 10],
'fetal_health': [1, 1, 2, 3, 2]
})
data = pd.DataFrame(
{
"feature1": [1, 2, 3, 4, 5],
"feature2": [6, 7, 8, 9, 10],
"fetal_health": [1, 1, 2, 3, 2],
}
)
return data


Expand Down Expand Up @@ -66,9 +66,9 @@ def test_train_model(sample_data):
Returns:
None
"""
X = sample_data.drop(['fetal_health'], axis=1)
y = sample_data['fetal_health'] - 1
X = sample_data.drop(["fetal_health"], axis=1)
y = sample_data["fetal_health"] - 1
model = create_model(X)
train_model(model, X, y, is_train=False)
assert model.history.history['loss'][-1] > 0
assert model.history.history['val_loss'][-1] > 0
assert model.history.history["loss"][-1] > 0
assert model.history.history["val_loss"][-1] > 0

0 comments on commit 8413713

Please sign in to comment.