diff --git a/integrations/model-training/hugging_face/notebooks/Comet_with_Hugging_Face_Trainer.ipynb b/integrations/model-training/hugging_face/notebooks/Comet_with_Hugging_Face_Trainer.ipynb index 978b22ab..69c77eab 100644 --- a/integrations/model-training/hugging_face/notebooks/Comet_with_Hugging_Face_Trainer.ipynb +++ b/integrations/model-training/hugging_face/notebooks/Comet_with_Hugging_Face_Trainer.ipynb @@ -242,7 +242,7 @@ "\n", "\n", "def compute_metrics(pred):\n", - " experiment = comet_ml.get_global_experiment()\n", + " experiment = comet_ml.get_running_experiment()\n", "\n", " labels = pred.label_ids\n", " preds = pred.predictions.argmax(-1)\n", diff --git a/integrations/model-training/hugging_face/transformers-distilbert-fine-tuning/transformers-distilbert-fine-tuning.py b/integrations/model-training/hugging_face/transformers-distilbert-fine-tuning/transformers-distilbert-fine-tuning.py index 03634bae..988edbf7 100644 --- a/integrations/model-training/hugging_face/transformers-distilbert-fine-tuning/transformers-distilbert-fine-tuning.py +++ b/integrations/model-training/hugging_face/transformers-distilbert-fine-tuning/transformers-distilbert-fine-tuning.py @@ -51,7 +51,7 @@ def preprocess(texts, labels): def compute_metrics(pred): - experiment = comet_ml.get_global_experiment() + experiment = comet_ml.get_running_experiment() labels = pred.label_ids preds = pred.predictions.argmax(-1) diff --git a/integrations/model-training/mlflow/notebooks/Comet_and_MLFlow.ipynb b/integrations/model-training/mlflow/notebooks/Comet_and_MLFlow.ipynb index 218efbae..3a6eb383 100644 --- a/integrations/model-training/mlflow/notebooks/Comet_and_MLFlow.ipynb +++ b/integrations/model-training/mlflow/notebooks/Comet_and_MLFlow.ipynb @@ -1,5 +1,29 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[Comet](https://www.comet.com/site/products/ml-experiment-tracking/) is an MLOps Platform that is designed to help Data Scientists and Teams build better models faster! Comet provides tooling to track, Explain, Manage, and Monitor your models in a single place! It works with Jupyter Notebooks and Scripts and most importantly it's 100% free to get started!\n", + "\n", + "[MLflow](https://github.com/mlflow/mlflow) is an Open source platform for the machine learning lifecycle\n", + "\n", + "Instrument MLFlow with Comet to start managing experiments, create dataset versions and track hyperparameters for faster and easier reproducibility and collaboration.\n", + "\n", + "[Find more information about our integration with MLflow](https://www.comet.com/docs/v2/integrations/ml-frameworks/mlflow/)\n", + "\n", + "Curious about how Comet can help you build better models, faster? Find out more about [Comet](https://www.comet.com/site/products/ml-experiment-tracking/) and our [other integrations](https://www.comet.com/docs/v2/integrations/overview/)\n", + "\n", + "Get a preview for what's to come. Check out a completed experiment created from this notebook [here](https://www.comet.com/examples/comet-example-mlflow-notebook/5b41a47a2f424209a48e38c96619bbcb)." + ] + }, { "cell_type": "markdown", "metadata": { @@ -17,7 +41,7 @@ }, "outputs": [], "source": [ - "%pip install -U \"comet_ml>=3.44.0\" mlflow" + "%pip install -U \"comet_ml>=3.44.0\" mlflow keras tensorflow" ] }, { @@ -42,118 +66,130 @@ "comet_ml.login(project_name=\"comet-example-mlflow-notebook\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# You can use 'tensorflow', 'torch' or 'jax' as backend. Make sure to set the environment variable before importing.\n", + "os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import keras\n", + "import numpy as np\n", + "\n", + "import mlflow.keras" + ] + }, { "cell_type": "markdown", "metadata": { "id": "1R-zIOmm2gJP" }, "source": [ - "# Run MLFlow" + "# Load Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n", + "x_train = np.expand_dims(x_train, axis=3)\n", + "x_test = np.expand_dims(x_test, axis=3)\n", + "x_train[0].shape" ] }, { "cell_type": "markdown", - "metadata": { - "id": "u-a86wIo3mfj" - }, + "metadata": {}, "source": [ - "Once Comet is imported at the top of your script, it will automatically log experiment data from your MLFlow runs" + "# Build Model " ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "id": "ljuZ8I_q2ZgX" - }, + "metadata": {}, "outputs": [], "source": [ - "import keras\n", - "\n", - "# The following import and function call are the only additions to code required\n", - "# to automatically log metrics and parameters to MLflow.\n", - "import mlflow\n", - "import mlflow.keras\n", + "NUM_CLASSES = 10\n", + "INPUT_SHAPE = (28, 28, 1)\n", "\n", - "import numpy as np\n", - "from keras.datasets import reuters\n", - "from keras.layers import Activation, Dense, Dropout\n", - "from keras.models import Sequential\n", - "from keras.preprocessing.text import Tokenizer\n", "\n", - "# The sqlite store is needed for the model registry\n", - "mlflow.set_tracking_uri(\"sqlite:///db.sqlite\")\n", + "def initialize_model():\n", + " return keras.Sequential(\n", + " [\n", + " keras.Input(shape=INPUT_SHAPE),\n", + " keras.layers.Conv2D(32, kernel_size=(3, 3), activation=\"relu\"),\n", + " keras.layers.Conv2D(32, kernel_size=(3, 3), activation=\"relu\"),\n", + " keras.layers.Conv2D(32, kernel_size=(3, 3), activation=\"relu\"),\n", + " keras.layers.GlobalAveragePooling2D(),\n", + " keras.layers.Dense(NUM_CLASSES, activation=\"softmax\"),\n", + " ]\n", + " )\n", "\n", - "# We need to create a run before calling keras or MLFlow will end the run by itself\n", - "mlflow.start_run()\n", "\n", - "mlflow.keras.autolog()\n", + "model = initialize_model()\n", + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Train Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "BATCH_SIZE = 64 # adjust this based on the memory of your machine\n", + "EPOCHS = 3\n", "\n", - "max_words = 1000\n", - "batch_size = 32\n", - "epochs = 5\n", + "model = initialize_model()\n", "\n", - "print(\"Loading data...\")\n", - "(x_train, y_train), (x_test, y_test) = reuters.load_data(\n", - " num_words=max_words, test_split=0.2\n", + "model.compile(\n", + " loss=keras.losses.SparseCategoricalCrossentropy(),\n", + " optimizer=keras.optimizers.Adam(),\n", + " metrics=[\"accuracy\"],\n", ")\n", "\n", - "print(len(x_train), \"train sequences\")\n", - "print(len(x_test), \"test sequences\")\n", - "\n", - "num_classes = np.max(y_train) + 1\n", - "print(num_classes, \"classes\")\n", - "\n", - "print(\"Vectorizing sequence data...\")\n", - "tokenizer = Tokenizer(num_words=max_words)\n", - "x_train = tokenizer.sequences_to_matrix(x_train, mode=\"binary\")\n", - "x_test = tokenizer.sequences_to_matrix(x_test, mode=\"binary\")\n", - "print(\"x_train shape:\", x_train.shape)\n", - "print(\"x_test shape:\", x_test.shape)\n", - "\n", - "print(\n", - " \"Convert class vector to binary class matrix \"\n", - " \"(for use with categorical_crossentropy)\"\n", - ")\n", - "y_train = keras.utils.np_utils.to_categorical(y_train, num_classes)\n", - "y_test = keras.utils.np_utils.to_categorical(y_test, num_classes)\n", - "print(\"y_train shape:\", y_train.shape)\n", - "print(\"y_test shape:\", y_test.shape)\n", - "\n", - "print(\"Building model...\")\n", - "model = Sequential()\n", - "model.add(Dense(512, input_shape=(max_words,)))\n", - "model.add(Activation(\"relu\"))\n", - "model.add(Dropout(0.5))\n", - "model.add(Dense(num_classes))\n", - "model.add(Activation(\"softmax\"))\n", - "\n", - "model.compile(loss=\"categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])\n", - "\n", - "history = model.fit(\n", + "run = mlflow.start_run()\n", + "model.fit(\n", " x_train,\n", " y_train,\n", - " batch_size=batch_size,\n", - " epochs=epochs,\n", - " verbose=1,\n", + " batch_size=BATCH_SIZE,\n", + " epochs=EPOCHS,\n", " validation_split=0.1,\n", + " callbacks=[mlflow.keras.MlflowCallback(run)],\n", ")\n", - "score = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=1)\n", - "print(\"Test score:\", score[0])\n", - "print(\"Test accuracy:\", score[1])\n", "\n", "mlflow.keras.log_model(model, \"model\", registered_model_name=\"Test Model\")\n", + "\n", "mlflow.end_run()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "sVz6748M6R8_" - }, - "outputs": [], - "source": [] } ], "metadata": { @@ -161,13 +197,23 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", + "language": "python", "name": "python3" }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 4 } diff --git a/integrations/model-training/sagemaker/log_custom_scripts/huggingface-text-classification/src/text_classification.py b/integrations/model-training/sagemaker/log_custom_scripts/huggingface-text-classification/src/text_classification.py index 08311230..f02cb4cc 100644 --- a/integrations/model-training/sagemaker/log_custom_scripts/huggingface-text-classification/src/text_classification.py +++ b/integrations/model-training/sagemaker/log_custom_scripts/huggingface-text-classification/src/text_classification.py @@ -1,17 +1,22 @@ +# coding: utf-8 + import argparse import json import logging import os -import random import sys import comet_ml -import torch + from datasets import load_from_disk from sklearn.metrics import accuracy_score, precision_recall_fscore_support -from transformers import (AutoModelForSequenceClassification, AutoTokenizer, - Trainer, TrainingArguments) +from transformers import ( + AutoModelForSequenceClassification, + AutoTokenizer, + Trainer, + TrainingArguments, +) if __name__ == "__main__": @@ -21,7 +26,8 @@ parser = argparse.ArgumentParser() - # hyperparameters sent by the client are passed as command-line arguments to the script. + # hyperparameters sent by the client are passed as command-line arguments + # to the script parser.add_argument("--epochs", type=int, default=3) parser.add_argument("--train_batch_size", type=int, default=32) parser.add_argument("--eval_batch_size", type=int, default=64) @@ -71,11 +77,11 @@ def compute_metrics(pred): return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall} def log_metrics(results): - experiment = comet_ml.get_global_experiment() + experiment = comet_ml.get_running_experiment() experiment.log_metrics(results) def log_sagemaker_metadata(): - experiment = comet_ml.get_global_experiment() + experiment = comet_ml.get_running_experiment() experiment.log_others(SM_TRAINING_ENV) def _get_model_metadata(): @@ -83,7 +89,7 @@ def _get_model_metadata(): return {"model_uri": model_uri} def log_model(name, model): - experiment = comet_ml.get_global_experiment() + experiment = comet_ml.get_running_experiment() comet_ml.integration.pytorch.log_model( experiment, model, name, metadata=_get_model_metadata() ) @@ -124,7 +130,7 @@ def log_model(name, model): # writes eval result to file which can be accessed later in s3 ouput with open(os.path.join(args.output_data_dir, "eval_results.txt"), "w") as writer: - print(f"***** Eval results *****") + print("***** Eval results *****") for key, value in sorted(eval_result.items()): writer.write(f"{key} = {value}\n")