Skip to content

Commit

Permalink
Add example for Composer integration
Browse files Browse the repository at this point in the history
  • Loading branch information
Lothiraldan committed Jun 25, 2024
1 parent 5dbd01c commit 8e97387
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 0 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test-examples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ jobs:
- {script: "integrations/model-training/hugging_face/transformers-distilbert-fine-tuning/transformers-distilbert-fine-tuning.py", arg: ""}
- {script: "integrations/model-training/keras/keras-mnist-dnn/keras-mnist-dnn.py", arg: ""}
- {script: "integrations/model-training/mlflow/mlflow-hello-world/mlflow-hello-world.py", arg: "run"}
- {script: "integrations/model-training/mosaicml/mosaicml-getting-started/mosaicml-getting-started.py", arg: ""}
- {script: "integrations/model-training/pytorch-lightning/pytorch-lightning-optimizer/pytorch-lightning-optimizer.py", arg: ""}
- {script: "integrations/model-training/pytorch/pytorch-mnist/pytorch-mnist-example.py", arg: ""}
- {script: "integrations/model-training/pytorch/pytorch-rich-logging/pytorch-rich-logging-example.py", arg: ""}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Composer integration with Comet.ml

[Composer](https://github.com/mosaicml/composer) is an open-source deep learning training library by [MosaicML](https://www.mosaicml.com/). Built on top of PyTorch, the Composer library makes it easier to implement distributed training workflows on large-scale clusters.

Instrument Composer with Comet to start managing experiments, create dataset versions and track hyperparameters for faster and easier reproducibility and collaboration.

## See it

Take a look at this [public Comet Project](https://www.comet.com/examples/comet-example-pytorch-mnist?utm_source=comet-examples&utm_medium=referral&utm_campaign=github_repo_2023&utm_content=pytorch).

## Setup

Install dependencies

```bash
python -m pip install -r requirements.txt
```

## Run the example

This example is based on the [offical Getting Started example](https://colab.research.google.com/github/mosaicml/composer/blob/master/examples/getting_started.ipynb#scrollTo=7a7HokeLUFLO). The code trains an Resnet to detect classes from the Cifar-10 dataset.


```bash
python mosaicml-getting-started.py
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
# coding: utf-8
import comet_ml

import composer
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from composer.loggers import CometMLLogger
from composer.models import ComposerClassifier
from torchvision import datasets, transforms

comet_ml.init(project_name="comet-example-mosaicml-getting-started")
torch.manual_seed(42) # For replicability

data_directory = "./data"

# Normalization constants
mean = (0.507, 0.487, 0.441)
std = (0.267, 0.256, 0.276)

batch_size = 1024

cifar10_transforms = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize(mean, std)]
)

train_dataset = datasets.CIFAR10(
data_directory, train=True, download=True, transform=cifar10_transforms
)
test_dataset = datasets.CIFAR10(
data_directory, train=False, download=True, transform=cifar10_transforms
)

# Our train and test dataloaders are PyTorch DataLoader objects!
train_dataloader = torch.utils.data.DataLoader(
train_dataset, batch_size=batch_size, shuffle=True
)
test_dataloader = torch.utils.data.DataLoader(
test_dataset, batch_size=batch_size, shuffle=True
)


class Block(nn.Module):
"""A ResNet block."""

def __init__(self, f_in: int, f_out: int, downsample: bool = False):
super(Block, self).__init__()

stride = 2 if downsample else 1
self.conv1 = nn.Conv2d(
f_in, f_out, kernel_size=3, stride=stride, padding=1, bias=False
)
self.bn1 = nn.BatchNorm2d(f_out)
self.conv2 = nn.Conv2d(
f_out, f_out, kernel_size=3, stride=1, padding=1, bias=False
)
self.bn2 = nn.BatchNorm2d(f_out)
self.relu = nn.ReLU(inplace=True)

# No parameters for shortcut connections.
if downsample or f_in != f_out:
self.shortcut = nn.Sequential(
nn.Conv2d(f_in, f_out, kernel_size=1, stride=2, bias=False),
nn.BatchNorm2d(f_out),
)
else:
self.shortcut = nn.Sequential()

def forward(self, x: torch.Tensor):
out = self.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
return self.relu(out)


class ResNetCIFAR(nn.Module):
"""A residual neural network as originally designed for CIFAR-10."""

def __init__(self, outputs: int = 10):
super(ResNetCIFAR, self).__init__()

depth = 56
width = 16
num_blocks = (depth - 2) // 6

plan = [(width, num_blocks), (2 * width, num_blocks), (4 * width, num_blocks)]

self.num_classes = outputs

# Initial convolution.
current_filters = plan[0][0]
self.conv = nn.Conv2d(
3, current_filters, kernel_size=3, stride=1, padding=1, bias=False
)
self.bn = nn.BatchNorm2d(current_filters)
self.relu = nn.ReLU(inplace=True)

# The subsequent blocks of the ResNet.
blocks = []
for segment_index, (filters, num_blocks) in enumerate(plan):
for block_index in range(num_blocks):
downsample = segment_index > 0 and block_index == 0
blocks.append(Block(current_filters, filters, downsample))
current_filters = filters

self.blocks = nn.Sequential(*blocks)

# Final fc layer. Size = number of filters in last segment.
self.fc = nn.Linear(plan[-1][0], outputs)
self.criterion = nn.CrossEntropyLoss()

def forward(self, x: torch.Tensor):
out = self.relu(self.bn(self.conv(x)))
out = self.blocks(out)
out = F.avg_pool2d(out, out.size()[3])
out = out.view(out.size(0), -1)
out = self.fc(out)
return out


model = ComposerClassifier(module=ResNetCIFAR(), num_classes=10)

optimizer = composer.optim.DecoupledSGDW(
model.parameters(), # Model parameters to update
lr=0.05, # Peak learning rate
momentum=0.9,
weight_decay=2.0e-3,
)

lr_scheduler = composer.optim.LinearWithWarmupScheduler(
t_warmup="1ep", # Warm up over 1 epoch
alpha_i=1.0, # Flat LR schedule achieved by having alpha_i == alpha_f
alpha_f=1.0,
)

logger_for_baseline = CometMLLogger()

train_epochs = "3ep"
device = "gpu" if torch.cuda.is_available() else "cpu"

trainer = composer.trainer.Trainer(
model=model,
train_dataloader=train_dataloader,
eval_dataloader=test_dataloader,
max_duration=train_epochs,
optimizers=optimizer,
schedulers=lr_scheduler,
device=device,
loggers=logger_for_baseline,
)

trainer.fit() # <-- Your training loop in action!
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
comet_ml
matplotlib
mosaicml

0 comments on commit 8e97387

Please sign in to comment.