Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add example for Composer integration #181

Merged
merged 2 commits into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/test-examples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ jobs:
- {script: "integrations/model-training/hugging_face/transformers-distilbert-fine-tuning/transformers-distilbert-fine-tuning.py", arg: ""}
- {script: "integrations/model-training/keras/keras-mnist-dnn/keras-mnist-dnn.py", arg: ""}
- {script: "integrations/model-training/mlflow/mlflow-hello-world/mlflow-hello-world.py", arg: "run"}
- {script: "integrations/model-training/mosaicml/mosaicml-getting-started/mosaicml-getting-started.py", arg: ""}
- {script: "integrations/model-training/pytorch-lightning/pytorch-lightning-optimizer/pytorch-lightning-optimizer.py", arg: ""}
- {script: "integrations/model-training/pytorch/pytorch-mnist/pytorch-mnist-example.py", arg: ""}
- {script: "integrations/model-training/pytorch/pytorch-rich-logging/pytorch-rich-logging-example.py", arg: ""}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Composer integration with Comet.ml

[Composer](https://github.com/mosaicml/composer) is an open-source deep learning training library by [MosaicML](https://www.mosaicml.com/). Built on top of PyTorch, the Composer library makes it easier to implement distributed training workflows on large-scale clusters.

Instrument Composer with Comet to start managing experiments, create dataset versions and track hyperparameters for faster and easier reproducibility and collaboration.

## See it

Take a look at this [public Comet Project](https://www.comet.com/examples/comet-example-mosaicml-getting-started).

## Setup

Install dependencies

```bash
python -m pip install -r requirements.txt
```

## Run the example

This example is based on the [offical Getting Started example](https://colab.research.google.com/github/mosaicml/composer/blob/master/examples/getting_started.ipynb). The code trains an Resnet to detect classes from the Cifar-10 dataset.


```bash
python mosaicml-getting-started.py
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
# coding: utf-8
import comet_ml

import composer
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from composer.loggers import CometMLLogger
from composer.models import ComposerClassifier
from torchvision import datasets, transforms

comet_ml.init(project_name="comet-example-mosaicml-getting-started")
torch.manual_seed(42) # For replicability

data_directory = "./data"

# Normalization constants
mean = (0.507, 0.487, 0.441)
std = (0.267, 0.256, 0.276)

batch_size = 1024

cifar10_transforms = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize(mean, std)]
)

train_dataset = datasets.CIFAR10(
data_directory, train=True, download=True, transform=cifar10_transforms
)
test_dataset = datasets.CIFAR10(
data_directory, train=False, download=True, transform=cifar10_transforms
)

# Our train and test dataloaders are PyTorch DataLoader objects!
train_dataloader = torch.utils.data.DataLoader(
train_dataset, batch_size=batch_size, shuffle=True
)
test_dataloader = torch.utils.data.DataLoader(
test_dataset, batch_size=batch_size, shuffle=True
)


class Block(nn.Module):
"""A ResNet block."""

def __init__(self, f_in: int, f_out: int, downsample: bool = False):
super(Block, self).__init__()

stride = 2 if downsample else 1
self.conv1 = nn.Conv2d(
f_in, f_out, kernel_size=3, stride=stride, padding=1, bias=False
)
self.bn1 = nn.BatchNorm2d(f_out)
self.conv2 = nn.Conv2d(
f_out, f_out, kernel_size=3, stride=1, padding=1, bias=False
)
self.bn2 = nn.BatchNorm2d(f_out)
self.relu = nn.ReLU(inplace=True)

# No parameters for shortcut connections.
if downsample or f_in != f_out:
self.shortcut = nn.Sequential(
nn.Conv2d(f_in, f_out, kernel_size=1, stride=2, bias=False),
nn.BatchNorm2d(f_out),
)
else:
self.shortcut = nn.Sequential()

def forward(self, x: torch.Tensor):
out = self.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
return self.relu(out)


class ResNetCIFAR(nn.Module):
"""A residual neural network as originally designed for CIFAR-10."""

def __init__(self, outputs: int = 10):
super(ResNetCIFAR, self).__init__()

depth = 56
width = 16
num_blocks = (depth - 2) // 6

plan = [(width, num_blocks), (2 * width, num_blocks), (4 * width, num_blocks)]

self.num_classes = outputs

# Initial convolution.
current_filters = plan[0][0]
self.conv = nn.Conv2d(
3, current_filters, kernel_size=3, stride=1, padding=1, bias=False
)
self.bn = nn.BatchNorm2d(current_filters)
self.relu = nn.ReLU(inplace=True)

# The subsequent blocks of the ResNet.
blocks = []
for segment_index, (filters, num_blocks) in enumerate(plan):
for block_index in range(num_blocks):
downsample = segment_index > 0 and block_index == 0
blocks.append(Block(current_filters, filters, downsample))
current_filters = filters

self.blocks = nn.Sequential(*blocks)

# Final fc layer. Size = number of filters in last segment.
self.fc = nn.Linear(plan[-1][0], outputs)
self.criterion = nn.CrossEntropyLoss()

def forward(self, x: torch.Tensor):
out = self.relu(self.bn(self.conv(x)))
out = self.blocks(out)
out = F.avg_pool2d(out, out.size()[3])
out = out.view(out.size(0), -1)
out = self.fc(out)
return out


model = ComposerClassifier(module=ResNetCIFAR(), num_classes=10)

optimizer = composer.optim.DecoupledSGDW(
model.parameters(), # Model parameters to update
lr=0.05, # Peak learning rate
momentum=0.9,
weight_decay=2.0e-3,
)

lr_scheduler = composer.optim.LinearWithWarmupScheduler(
t_warmup="1ep", # Warm up over 1 epoch
alpha_i=1.0, # Flat LR schedule achieved by having alpha_i == alpha_f
alpha_f=1.0,
)

logger_for_baseline = CometMLLogger()

train_epochs = "3ep"
device = "gpu" if torch.cuda.is_available() else "cpu"

trainer = composer.trainer.Trainer(
model=model,
train_dataloader=train_dataloader,
eval_dataloader=test_dataloader,
max_duration=train_epochs,
optimizers=optimizer,
schedulers=lr_scheduler,
device=device,
loggers=logger_for_baseline,
)

trainer.fit() # <-- Your training loop in action!
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
comet_ml
matplotlib
mosaicml
Loading