- CIFAR-10 - Object Recognition in Images ,我的 Github Repo 地址
- 比赛数据分为训练数据集和测试数据集。训练集包含 5 万张图片。测试集包含 30 万张图片:其中有 1 万张图片用来计分,但为了防止人工标注测试集,里面另加了 29 万张不计分的图片。
- 两个数据集都是 png 彩色图片,大小为 32×32×3。训练集一共有 10 类图片,分别为飞机、汽车、鸟、猫、鹿、狗、青蛙、马、船和卡车。
trainLabels.csv
:../data/trainLabels.csv
train.7z
:../data/train/[1-50000].png
test.7z
:../data/test/[1-300000].png
- 提交格式
id,label
1,cat
2,cat
3,cat
4,cat
...
- 重新整理数据集(
reorg_cifar10_data
)
import os
import shutil
def reorg_cifar10_data(data_dir, label_file, train_dir, test_dir, input_dir, valid_ratio):
# 读取训练数据标签。
with open(os.path.join(data_dir, label_file), 'r') as f:
# 跳过文件头行(栏名称)。
lines = f.readlines()[1:]
tokens = [l.rstrip().split(',') for l in lines]
idx_label = dict(((int(idx), label) for idx, label in tokens))
labels = set(idx_label.values())
num_train = len(os.listdir(os.path.join(data_dir, train_dir)))
num_train_tuning = int(num_train * (1 - valid_ratio))
assert 0 < num_train_tuning < num_train
num_train_tuning_per_label = num_train_tuning // len(labels)
label_count = dict()
def mkdir_if_not_exist(path):
if not os.path.exists(os.path.join(*path)):
os.makedirs(os.path.join(*path))
# 整理训练和验证集。
for train_file in os.listdir(os.path.join(data_dir, train_dir)):
idx = int(train_file.split('.')[0])
label = idx_label[idx]
mkdir_if_not_exist([data_dir, input_dir, 'train_valid', label])
shutil.copy(os.path.join(data_dir, train_dir, train_file),
os.path.join(data_dir, input_dir, 'train_valid', label))
if label not in label_count or label_count[label] < num_train_tuning_per_label:
mkdir_if_not_exist([data_dir, input_dir, 'train', label])
shutil.copy(os.path.join(data_dir, train_dir, train_file),
os.path.join(data_dir, input_dir, 'train', label))
label_count[label] = label_count.get(label, 0) + 1
else:
mkdir_if_not_exist([data_dir, input_dir, 'valid', label])
shutil.copy(os.path.join(data_dir, train_dir, train_file),
os.path.join(data_dir, input_dir, 'valid', label))
# 整理测试集。
mkdir_if_not_exist([data_dir, input_dir, 'test', 'unknown'])
for test_file in os.listdir(os.path.join(data_dir, test_dir)):
shutil.copy(os.path.join(data_dir, test_dir, test_file),
os.path.join(data_dir, input_dir, 'test', 'unknown'))
- 使用Gluon进行数据增强及图像读取
from mxnet import autograd
from mxnet import gluon
from mxnet import image
from mxnet import init
from mxnet import nd
from mxnet.gluon.data import vision
import numpy as np
def transform_train(data, label):
im = data.astype('float32') / 255
auglist = image.CreateAugmenter(data_shape=(3, 32, 32), resize=0,
rand_crop=True, rand_resize=True, rand_mirror=True,
mean=np.array([0.4914, 0.4822, 0.4465]),
std=np.array([0.2023, 0.1994, 0.2010]),
brightness=0, contrast=0,
saturation=0, hue=0,
pca_noise=0, rand_gray=0, inter_method=2)
for aug in auglist:
im = aug(im)
# 将数据格式从"高*宽*通道"改为"通道*高*宽"。
im = nd.transpose(im, (2,0,1))
return (im, nd.array([label]).asscalar().astype('float32'))
# 测试时,无需对图像做标准化以外的增强数据处理。
def transform_test(data, label):
im = data.astype('float32') / 255
auglist = image.CreateAugmenter(data_shape=(3, 32, 32),
mean=np.array([0.4914, 0.4822, 0.4465]),
std=np.array([0.2023, 0.1994, 0.2010]))
for aug in auglist:
im = aug(im)
im = nd.transpose(im, (2,0,1))
return (im, nd.array([label]).asscalar().astype('float32'))
- ResNet 网络自定义
from mxnet.gluon import nn
from mxnet import nd
class Residual(nn.HybridBlock):
def __init__(self, channels, same_shape=True, **kwargs):
super(Residual, self).__init__(**kwargs)
self.same_shape = same_shape
with self.name_scope():
strides = 1 if same_shape else 2
self.conv1 = nn.Conv2D(channels, kernel_size=3, padding=1,
strides=strides)
self.bn1 = nn.BatchNorm()
self.conv2 = nn.Conv2D(channels, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm()
if not same_shape:
self.conv3 = nn.Conv2D(channels, kernel_size=1,
strides=strides)
def hybrid_forward(self, F, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
if not self.same_shape:
x = self.conv3(x)
return F.relu(out + x)
class Residual_v2(nn.HybridBlock):
def __init__(self, channels, same_shape=True, **kwargs):
super(Residual_v2, self).__init__(**kwargs)
self.same_shape = same_shape
with self.name_scope():
strides = 1 if same_shape else 2
self.conv1 = nn.Conv2D(channels, kernel_size=3, padding=1,
strides=strides)
self.bn1 = nn.BatchNorm()
self.conv2 = nn.Conv2D(channels, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm()
if not same_shape:
self.conv3 = nn.Conv2D(channels, kernel_size=1,
strides=strides)
def hybrid_forward(self, F, x):
out = self.conv1(F.relu(self.bn1(x)))
out = self.conv2(F.relu(self.bn2(out)))
if not self.same_shape:
x = self.conv3(x)
return out + x
class Residual_v2_bottleneck(nn.HybridBlock):
def __init__(self, channels, same_shape=True, **kwargs):
super(Residual_v2_bottleneck, self).__init__(**kwargs)
self.same_shape = same_shape
with self.name_scope():
strides = 1 if same_shape else 2
self.bn1 = nn.BatchNorm()
self.conv1 = nn.Conv2D(channels=channels//4, kernel_size=1, use_bias=False)
self.bn2 = nn.BatchNorm()
self.conv2 = nn.Conv2D(channels=channels//4, kernel_size=3, padding=1, strides=strides, use_bias=False)
self.bn3 = nn.BatchNorm()
self.conv3 = nn.Conv2D(channels=channels, kernel_size=1, use_bias=False)
self.bn4 = nn.BatchNorm()
if not same_shape:
self.conv4 = nn.Conv2D(channels=channels, kernel_size=1,
strides=strides, use_bias=False)
def hybrid_forward(self, F, x):
out = self.conv1(self.bn1(x))
out = F.relu(self.bn2(out))
out = F.relu(self.bn3(self.conv2(out)))
out = self.bn4(self.conv3(out))
if not self.same_shape:
x = self.conv4(x)
return out + x
class ResNet(nn.HybridBlock):
def __init__(self, num_classes, verbose=False, **kwargs):
super(ResNet, self).__init__(**kwargs)
self.verbose = verbose
with self.name_scope():
net = self.net = nn.HybridSequential()
# block 1
net.add(nn.Conv2D(channels=32, kernel_size=3, strides=1, padding=1))
net.add(nn.BatchNorm())
net.add(nn.Activation(activation='relu'))
# block 2
for _ in range(3):
net.add(Residual(channels=32))
# block 3
net.add(Residual(channels=64, same_shape=False))
for _ in range(2):
net.add(Residual(channels=64))
# block 4
net.add(Residual(channels=128, same_shape=False))
for _ in range(2):
net.add(Residual(channels=128))
# block 5
net.add(nn.AvgPool2D(pool_size=8))
net.add(nn.Flatten())
net.add(nn.Dense(num_classes))
def hybrid_forward(self, F, x):
out = x
for i, b in enumerate(self.net):
out = b(out)
if self.verbose:
print('Block %d output: %s'%(i+1, out.shape))
return out
class ResNet164_v2(nn.HybridBlock):
def __init__(self, num_classes, verbose=False, **kwargs):
super(ResNet164_v2, self).__init__(**kwargs)
self.verbose = verbose
with self.name_scope():
net = self.net = nn.HybridSequential()
# block 1
net.add(nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1, use_bias=False))
# block 2
for _ in range(27):
net.add(Residual_v2_bottleneck(channels=64))
# block 3
net.add(Residual_v2_bottleneck(channels=128, same_shape=False))
for _ in range(26):
net.add(Residual_v2_bottleneck(channels=128))
# block 4
net.add(Residual_v2_bottleneck(channels=256, same_shape=False))
for _ in range(26):
net.add(Residual_v2_bottleneck(channels=256))
# block 5
net.add(nn.BatchNorm())
net.add(nn.Activation(activation='relu'))
net.add(nn.AvgPool2D(pool_size=8))
net.add(nn.Flatten())
net.add(nn.Dense(num_classes))
def hybrid_forward(self, F, x):
out = x
for i, b in enumerate(self.net):
out = b(out)
if self.verbose:
print('Block %d output: %s'%(i+1, out.shape))
return out
- 训练模型(
train
)
import datetime
def accuracy(output, label):
return nd.mean(output.argmax(axis=1)==label).asscalar()
def evaluate_accuracy(data_iterator, net, ctx=[mx.cpu()]):
if isinstance(ctx, mx.Context):
ctx = [ctx]
acc = nd.array([0])
n = 0.
if isinstance(data_iterator, mx.io.MXDataIter):
data_iterator.reset()
for batch in data_iterator:
data, label, batch_size = _get_batch(batch, ctx)
for X, y in zip(data, label):
acc += nd.sum(net(X).argmax(axis=1)==y).copyto(mx.cpu())
n += y.size
acc.wait_to_read() # don't push too many operators into backend
return acc.asscalar() / n
def train(net, train_data, valid_data, num_epochs, lr, wd, ctx, lr_period, lr_decay):
trainer = gluon.Trainer(
net.collect_params(), 'sgd', {'learning_rate': lr, 'momentum': 0.9, 'wd': wd})
prev_time = datetime.datetime.now()
for epoch in range(num_epochs):
train_loss = 0.0
train_acc = 0.0
if epoch > 0 and epoch % lr_period == 0:
trainer.set_learning_rate(trainer.learning_rate * lr_decay)
for data, label in train_data:
label = label.as_in_context(ctx)
with autograd.record():
output = net(data.as_in_context(ctx))
loss = softmax_cross_entropy(output, label)
loss.backward()
trainer.step(batch_size)
train_loss += nd.mean(loss).asscalar()
train_acc += utils.accuracy(output, label)
cur_time = datetime.datetime.now()
h, remainder = divmod((cur_time - prev_time).seconds, 3600)
m, s = divmod(remainder, 60)
time_str = "Time %02d:%02d:%02d" % (h, m, s)
if valid_data is not None:
valid_acc = utils.evaluate_accuracy(valid_data, net, ctx)
epoch_str = ("Epoch %d. Loss: %f, Train acc %f, Valid acc %f, "
% (epoch, train_loss / len(train_data),
train_acc / len(train_data), valid_acc))
else:
epoch_str = ("Epoch %d. Loss: %f, Train acc %f, "
% (epoch, train_loss / len(train_data),
train_acc / len(train_data)))
prev_time = cur_time
print(epoch_str + time_str + ', lr ' + str(trainer.learning_rate))
- 参数:
超参数 | 值 |
---|---|
aug | rand_crop, rand_resize, rand_mirror |
net | ResNet164_v2(use Residual_v2_bottleneck) |
num_epochs | 300 |
learning_rate | 0.1 |
weight_decay | 1e-4 |
lr_period | 120 |
lr_decay | lr_decay = 0.1 |
- ResNet-164-V2 网络模型最终达到 95.27%,Leaderboard 成绩第二(榜单已不再更新)。