From 224588e02b05d6b11c9a553fda3d548eadbdff1b Mon Sep 17 00:00:00 2001 From: osmr Date: Fri, 17 Aug 2018 02:51:21 +0300 Subject: [PATCH] After testing the release --- README.md | 2 +- gluon/models/model_store.py | 1 - pytorch/models/model_store.py | 1 - pytorch/models/others/MENet.py | 227 -------------------- pytorch/models/others/layers.py | 259 ----------------------- pytorch/models/others/slim.py | 360 -------------------------------- 6 files changed, 1 insertion(+), 849 deletions(-) delete mode 100644 pytorch/models/others/MENet.py delete mode 100644 pytorch/models/others/layers.py delete mode 100644 pytorch/models/others/slim.py diff --git a/README.md b/README.md index 273fe59fe..5ff0f5086 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ torchvision >= 0.2.1 - ResNet (['Deep Residual Learning for Image Recognition'](https://arxiv.org/abs/1512.03385)) - PreResNet (['Identity Mappings in Deep Residual Networks'](https://arxiv.org/abs/1603.05027)) - DenseNet (['Densely Connected Convolutional Networks'](https://arxiv.org/abs/1608.06993)) -- CondenseNet (['Condense````Net: An Efficient DenseNet using Learned Group Convolutions'](https://arxiv.org/abs/1711.09224)) +- CondenseNet (['CondenseNet: An Efficient DenseNet using Learned Group Convolutions'](https://arxiv.org/abs/1711.09224)) - DarkNet (['Darknet: Open source neural networks in c'](https://github.com/pjreddie/darknet)) - SqueezeNet (['SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size'](https://arxiv.org/abs/1602.07360)) - SqueezeNext (['SqueezeNext: Hardware-Aware Neural Network Design'](https://arxiv.org/abs/1803.10615)) diff --git a/gluon/models/model_store.py b/gluon/models/model_store.py index 59f04c21f..f7d1b08ea 100644 --- a/gluon/models/model_store.py +++ b/gluon/models/model_store.py @@ -47,7 +47,6 @@ ('menet348_12x1_g3', '1141', 'ac69b246629131d77bf5a0a454bda28f5c2e6bc0', 'v0.0.6'), ('menet352_12x1_g8', '1375', '85779b8a576540ec1082a433bd5ea1ab93def27a', 'v0.0.6'), ('menet456_24x1_g3', '1043', '6e777068761f9c45cd0527f3824ad3b5cf36b0b5', 'v0.0.6'), - ('mobilenet_wd4', '2410', 'db312a26033119ad1601fe0300e7c52a11cba93c', 'v0.0.7'), ('mobilenet_wd2', '1537', '5419ccc26dedfbb7242e2f4f7c52b13f94812099', 'v0.0.7'), ('mobilenet_w3d4', '1228', 'dc11727a3917f2c795c9f286ad9cf299a165fe85', 'v0.0.7'), diff --git a/pytorch/models/model_store.py b/pytorch/models/model_store.py index 0a0957d48..522b07dea 100644 --- a/pytorch/models/model_store.py +++ b/pytorch/models/model_store.py @@ -48,7 +48,6 @@ ('menet348_12x1_g3', '1092', '66be1a1896fa0bea27290580e8b98057dfdbda2c', 'v0.0.6'), ('menet352_12x1_g8', '1308', 'e91ec72ce2d0c3c2bf2a3cba6719c6b23ea7c736', 'v0.0.6'), ('menet456_24x1_g3', '0993', 'cb9fd37660b6064f44a6c779a330a967b2b41c2d', 'v0.0.6'), - ('mobilenet_wd4', '2493', 'c05b5fab876300552b1c9b58d82ff98eb755c15b', 'v0.0.7'), ('mobilenet_wd2', '1599', '5883b38d611897bf4b1b49d9eeded2d1868c5c0a', 'v0.0.7'), ('mobilenet_w3d4', '1285', 'b8022faebe280b6e6571bec3a4bb6e293895a72d', 'v0.0.7'), diff --git a/pytorch/models/others/MENet.py b/pytorch/models/others/MENet.py deleted file mode 100644 index b2b2f96ac..000000000 --- a/pytorch/models/others/MENet.py +++ /dev/null @@ -1,227 +0,0 @@ -''' -Merging-and-Evolution Network -''' -import torch -import torch.nn as nn -import torch.nn.functional as F -from collections import OrderedDict -from .common import channel_shuffle - -__all__ = [ - 'menet', - 'oth_menet108_8x1_g3', - 'oth_menet128_8x1_g4', - 'oth_menet160_8x1_g8', - 'oth_menet228_12x1_g3', - 'oth_menet256_12x1_g4', - 'oth_menet348_12x1_g3', - 'oth_menet352_12x1_g8', - 'oth_menet456_24x1_g3', -] - - -def depthwise_conv(c, stride): - return nn.Conv2d(c, c, 3, stride=stride, padding=1, groups=c, bias=False) - - -def group_conv(in_c, out_c, groups): - return nn.Conv2d(in_c, out_c, 1, groups=groups, bias=False) - - -def conv1x1(in_c, out_c): - return nn.Conv2d(in_c, out_c, 1, bias=False) - - -def conv3x3(in_c, out_c, stride): - return nn.Conv2d(in_c, out_c, 3, stride=stride, padding=1, bias=False) - - -class _MEModule(nn.Module): - def __init__(self, in_c, out_c, side_c, downsample, groups, ignore_group): - super(_MEModule, self).__init__() - bott = out_c // 4 - self.downsample = downsample - self.groups = groups - if downsample: - out_c -= in_c - # residual branch - if ignore_group: - self.compress = group_conv(in_c, bott, 1) - else: - self.compress = group_conv(in_c, bott, groups) - self.bn_compress = nn.BatchNorm2d(bott) - self.depthwise = depthwise_conv(bott, 2) - self.bn_depthwise = nn.BatchNorm2d(bott) - self.expand = group_conv(bott, out_c, groups) - self.bn_expand = nn.BatchNorm2d(out_c) - self.pool = nn.AvgPool2d(3, stride=2, padding=1) - # fusion branch - self.s_merge = conv1x1(bott, side_c) - self.s_bn_merge = nn.BatchNorm2d(side_c) - self.s_conv = conv3x3(side_c, side_c, 2) - self.s_bn_conv = nn.BatchNorm2d(side_c) - self.s_evolve = conv1x1(side_c, bott) - self.s_bn_evolve = nn.BatchNorm2d(bott) - else: - # residual branch - self.compress = group_conv(in_c, bott, groups) - self.bn_compress = nn.BatchNorm2d(bott) - self.depthwise = depthwise_conv(bott, 1) - self.bn_depthwise = nn.BatchNorm2d(bott) - self.expand = group_conv(bott, out_c, groups) - self.bn_expand = nn.BatchNorm2d(out_c) - # fusion branch - self.s_merge = conv1x1(bott, side_c) - self.s_bn_merge = nn.BatchNorm2d(side_c) - self.s_conv = conv3x3(side_c, side_c, 1) - self.s_bn_conv = nn.BatchNorm2d(side_c) - self.s_evolve = conv1x1(side_c, bott) - self.s_bn_evolve = nn.BatchNorm2d(bott) - - def forward(self, x): - identity = x - # pointwise group convolution 1 - x = self.compress(x) - x = self.bn_compress(x) - x = F.relu(x, inplace=True) - x = channel_shuffle(x, self.groups) - # merging - y = self.s_merge(x) - y = self.s_bn_merge(y) - y = F.relu(y, inplace=True) - # depthwise convolution (bottleneck) - x = self.depthwise(x) - x = self.bn_depthwise(x) - # evolution - y = self.s_conv(y) - y = self.s_bn_conv(y) - y = F.relu(y, inplace=True) - y = self.s_evolve(y) - y = self.s_bn_evolve(y) - y = F.sigmoid(y) - x *= y - # pointwise group convolution 2 - x = self.expand(x) - x = self.bn_expand(x) - # identity branch - if self.downsample: - identity = self.pool(identity) - x = torch.cat((x, identity), dim=1) - else: - x += identity - x = F.relu(x, inplace=True) - return x - - -class _InitBlock(nn.Module): - def __init__(self, init_c): - super(_InitBlock, self).__init__() - self.conv = conv3x3(3, init_c, 2) - self.bn = nn.BatchNorm2d(init_c) - self.pool = nn.MaxPool2d(3, stride=2, padding=1) - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - x = F.relu(x, inplace=True) - x = self.pool(x) - return x - - -class MENet(nn.Module): - def __init__(self, block_channels, block_layers, init_c, side_channels, groups): - super(MENet, self).__init__() - self.features = nn.Sequential(OrderedDict([ - ('init', _InitBlock(init_c)), - ])) - in_c = init_c - for i, (out_c, num_layers, side_c) in enumerate(zip(block_channels, block_layers, side_channels)): - self.features.add_module( - 'stage_{}_{}'.format(i + 1, 1), - _MEModule(in_c, out_c, side_c, True, groups, (i == 0)) - ) - for _ in range(num_layers): - self.features.add_module( - 'stage_{}_{}'.format(i + 1, _ + 2), - _MEModule(out_c, out_c, side_c, False, groups, False) - ) - in_c = out_c - self.pool = nn.AvgPool2d(7) - self.classifier = nn.Linear(in_c, 1000) - - def forward(self, x): - x = self.features(x) - x = self.pool(x) - x = x.view(x.size(0), -1) - x = self.classifier(x) - #print(tuple(x.size())) - return x - - -def menet(model_config): - block_channels = model_config['block_channels'] - block_layers = model_config['block_layers'] - init_c = model_config['init_c'] - side_channels = model_config['side_channels'] - groups = model_config['groups'] - return MENet(block_channels, block_layers, init_c, side_channels, groups) - - -def oth_menet108_8x1_g3(**kwargs): - return menet({"block_channels": [108, 216, 432], "block_layers": [3, 7, 3], "init_c": 12, - "side_channels": [8, 8, 8], "groups": 3}) - - -def oth_menet128_8x1_g4(**kwargs): - return menet({"block_channels": [128, 256, 512], "block_layers": [3, 7, 3], "init_c": 12, - "side_channels": [8, 8, 8], "groups": 4}) - - -def oth_menet160_8x1_g8(**kwargs): - return menet({"block_channels": [160, 320, 640], "block_layers": [3, 7, 3], "init_c": 16, - "side_channels": [8, 8, 8], "groups": 8}) - - -def oth_menet228_12x1_g3(**kwargs): - return menet({"block_channels": [228, 456, 912], "block_layers": [3, 7, 3], "init_c": 24, - "side_channels": [12, 12, 12], "groups": 3}) - - -def oth_menet256_12x1_g4(**kwargs): - return menet({"block_channels": [256, 512, 1024], "block_layers": [3, 7, 3], "init_c": 24, - "side_channels": [12, 12, 12], "groups": 4}) - - -def oth_menet348_12x1_g3(**kwargs): - return menet({"block_channels": [348, 696, 1392], "block_layers": [3, 7, 3], "init_c": 24, - "side_channels": [12, 12, 12], "groups": 3}) - - -def oth_menet352_12x1_g8(**kwargs): - return menet({"block_channels": [352, 704, 1408], "block_layers": [3, 7, 3], "init_c": 24, - "side_channels": [12, 12, 12], "groups": 8}) - - -def oth_menet456_24x1_g3(**kwargs): - return menet({"block_channels": [456, 912, 1824], "block_layers": [3, 7, 3], "init_c": 48, - "side_channels": [24, 24, 24], "groups": 3}) - - -if __name__ == "__main__": - import numpy as np - import torch - from torch.autograd import Variable - - net = oth_menet456_24x1_g3(num_classes=1000) - - input = Variable(torch.randn(1, 3, 224, 224)) - output = net(input) - #print(output.size()) - #print("net={}".format(net)) - - net.eval() - net_params = filter(lambda p: p.requires_grad, net.parameters()) - weight_count = 0 - for param in net_params: - weight_count += np.prod(param.size()) - print("weight_count={}".format(weight_count)) diff --git a/pytorch/models/others/layers.py b/pytorch/models/others/layers.py deleted file mode 100644 index e9c0c932b..000000000 --- a/pytorch/models/others/layers.py +++ /dev/null @@ -1,259 +0,0 @@ -from __future__ import absolute_import -from __future__ import unicode_literals -from __future__ import print_function -from __future__ import division - -import torch -import torch.nn as nn -from torch.autograd import Variable -import torch.nn.functional as F - - -class LearnedGroupConv(nn.Module): - global_progress = 0.0 - - def __init__(self, in_channels, out_channels, kernel_size, stride=1, - padding=0, dilation=1, groups=1, - condense_factor=None, dropout_rate=0.): - super(LearnedGroupConv, self).__init__() - self.norm = nn.BatchNorm2d(in_channels) - self.relu = nn.ReLU(inplace=True) - self.dropout_rate = dropout_rate - if self.dropout_rate > 0: - self.drop = nn.Dropout(dropout_rate, inplace=False) - self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, - padding, dilation, groups=1, bias=False) - self.in_channels = in_channels - self.out_channels = out_channels - self.groups = groups - self.condense_factor = condense_factor - if self.condense_factor is None: - self.condense_factor = self.groups - ### Parameters that should be carefully used - self.register_buffer('_count', torch.zeros(1)) - self.register_buffer('_stage', torch.zeros(1)) - self.register_buffer('_mask', torch.ones(self.conv.weight.size())) - ### Check if arguments are valid - assert self.in_channels % self.groups == 0, "group number can not be divided by input channels" - assert self.in_channels % self.condense_factor == 0, "condensation factor can not be divided by input channels" - assert self.out_channels % self.groups == 0, "group number can not be divided by output channels" - - def forward(self, x): - self._check_drop() - x = self.norm(x) - x = self.relu(x) - if self.dropout_rate > 0: - x = self.drop(x) - ### Masked output - weight = self.conv.weight * self.mask - return F.conv2d(x, weight, None, self.conv.stride, - self.conv.padding, self.conv.dilation, 1) - - def _check_drop(self): - progress = LearnedGroupConv.global_progress - delta = 0 - ### Get current stage - for i in range(self.condense_factor - 1): - if progress * 2 < (i + 1) / (self.condense_factor - 1): - stage = i - break - else: - stage = self.condense_factor - 1 - ### Check for dropping - if not self._reach_stage(stage): - self.stage = stage - delta = self.in_channels // self.condense_factor - if delta > 0: - self._dropping(delta) - return - - def _dropping(self, delta): - weight = self.conv.weight * self.mask - ### Sum up all kernels - ### Assume only apply to 1x1 conv to speed up - assert weight.size()[-1] == 1 - weight = weight.abs().squeeze() - assert weight.size()[0] == self.out_channels - assert weight.size()[1] == self.in_channels - d_out = self.out_channels // self.groups - ### Shuffle weight - weight = weight.view(d_out, self.groups, self.in_channels) - weight = weight.transpose(0, 1).contiguous() - weight = weight.view(self.out_channels, self.in_channels) - ### Sort and drop - for i in range(self.groups): - wi = weight[i * d_out:(i + 1) * d_out, :] - ### Take corresponding delta index - di = wi.sum(0).sort()[1][self.count:self.count + delta] - for d in di.data: - self._mask[i::self.groups, d, :, :].fill_(0) - self.count = self.count + delta - - @property - def count(self): - return int(self._count[0]) - - @count.setter - def count(self, val): - self._count.fill_(val) - - @property - def stage(self): - return int(self._stage[0]) - - @stage.setter - def stage(self, val): - self._stage.fill_(val) - - @property - def mask(self): - return Variable(self._mask) - - def _reach_stage(self, stage): - return (self._stage >= stage).all() - - @property - def lasso_loss(self): - if self._reach_stage(self.groups - 1): - return 0 - weight = self.conv.weight * self.mask - ### Assume only apply to 1x1 conv to speed up - assert weight.size()[-1] == 1 - weight = weight.squeeze().pow(2) - d_out = self.out_channels // self.groups - ### Shuffle weight - weight = weight.view(d_out, self.groups, self.in_channels) - weight = weight.sum(0).clamp(min=1e-6).sqrt() - return weight.sum() - - -def ShuffleLayer(x, groups): - batchsize, num_channels, height, width = x.data.size() - channels_per_group = num_channels // groups - ### reshape - x = x.view(batchsize, groups, - channels_per_group, height, width) - ### transpose - x = torch.transpose(x, 1, 2).contiguous() - ### flatten - x = x.view(batchsize, -1, height, width) - return x - - -class CondensingLinear(nn.Module): - def __init__(self, model, drop_rate=0.5): - super(CondensingLinear, self).__init__() - self.in_features = int(model.in_features * drop_rate) - self.out_features = model.out_features - self.linear = nn.Linear(self.in_features, self.out_features) - self.register_buffer('index', torch.LongTensor(self.in_features)) - _, index = model.weight.data.abs().sum(0).sort() - index = index[model.in_features - self.in_features:] - self.linear.bias.data = model.bias.data.clone() - for i in range(self.in_features): - self.index[i] = index[i] - self.linear.weight.data[:, i] = model.weight.data[:, index[i]] - - def forward(self, x): - x = torch.index_select(x, 1, Variable(self.index)) - x = self.linear(x) - return x - - -class CondensingConv(nn.Module): - def __init__(self, model): - super(CondensingConv, self).__init__() - self.in_channels = model.conv.in_channels \ - * model.groups // model.condense_factor - self.out_channels = model.conv.out_channels - self.groups = model.groups - self.condense_factor = model.condense_factor - self.norm = nn.BatchNorm2d(self.in_channels) - self.relu = nn.ReLU(inplace=True) - self.conv = nn.Conv2d(self.in_channels, self.out_channels, - kernel_size=model.conv.kernel_size, - padding=model.conv.padding, - groups=self.groups, - bias=False, - stride=model.conv.stride) - self.register_buffer('index', torch.LongTensor(self.in_channels)) - index = 0 - mask = model._mask.mean(-1).mean(-1) - for i in range(self.groups): - for j in range(model.conv.in_channels): - if index < (self.in_channels // self.groups) * (i + 1) \ - and mask[i, j] == 1: - for k in range(self.out_channels // self.groups): - idx_i = int(k + i * (self.out_channels // self.groups)) - idx_j = index % (self.in_channels // self.groups) - self.conv.weight.data[idx_i, idx_j, :, :] = \ - model.conv.weight.data[int(i + k * self.groups), j, :, :] - self.norm.weight.data[index] = model.norm.weight.data[j] - self.norm.bias.data[index] = model.norm.bias.data[j] - self.norm.running_mean[index] = model.norm.running_mean[j] - self.norm.running_var[index] = model.norm.running_var[j] - self.index[index] = j - index += 1 - - def forward(self, x): - x = torch.index_select(x, 1, Variable(self.index)) - x = self.norm(x) - x = self.relu(x) - x = self.conv(x) - x = ShuffleLayer(x, self.groups) - return x - - -class CondenseLinear(nn.Module): - def __init__(self, in_features, out_features, drop_rate=0.5): - super(CondenseLinear, self).__init__() - self.in_features = int(in_features * drop_rate) - self.out_features = out_features - self.linear = nn.Linear(self.in_features, self.out_features) - self.register_buffer('index', torch.LongTensor(self.in_features)) - - def forward(self, x): - x = torch.index_select(x, 1, Variable(self.index)) - x = self.linear(x) - return x - - -class CondenseConv(nn.Module): - def __init__(self, in_channels, out_channels, kernel_size, - stride=1, padding=0, groups=1): - super(CondenseConv, self).__init__() - self.in_channels = in_channels - self.out_channels = out_channels - self.groups = groups - self.norm = nn.BatchNorm2d(self.in_channels) - self.relu = nn.ReLU(inplace=True) - self.conv = nn.Conv2d(self.in_channels, self.out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=self.groups, - bias=False) - self.register_buffer('index', torch.LongTensor(self.in_channels)) - self.index.fill_(0) - - def forward(self, x): - x = torch.index_select(x, 1, Variable(self.index)) - x = self.norm(x) - x = self.relu(x) - x = self.conv(x) - x = ShuffleLayer(x, self.groups) - return x - - -class Conv(nn.Sequential): - def __init__(self, in_channels, out_channels, kernel_size, - stride=1, padding=0, groups=1): - super(Conv, self).__init__() - self.add_module('norm', nn.BatchNorm2d(in_channels)) - self.add_module('relu', nn.ReLU(inplace=True)) - self.add_module('conv', nn.Conv2d(in_channels, out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, bias=False, - groups=groups)) - diff --git a/pytorch/models/others/slim.py b/pytorch/models/others/slim.py deleted file mode 100644 index 0339cfe57..000000000 --- a/pytorch/models/others/slim.py +++ /dev/null @@ -1,360 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import torch -import torch.nn as nn - -try: - import caffe - from caffe import layers as L - from caffe import params as P -except ImportError: - pass - - -def g_name(g_name, m): - m.g_name = g_name - return m - - -class ChannelShuffle(nn.Module): - def __init__(self, groups): - super(ChannelShuffle, self).__init__() - self.groups = groups - - def forward(self, x): - x = x.reshape(x.shape[0], self.groups, x.shape[1] // self.groups, x.shape[2], x.shape[3]) - x = x.permute(0, 2, 1, 3, 4) - x = x.reshape(x.shape[0], -1, x.shape[3], x.shape[4]) - return x - - def generate_caffe_prototxt(self, caffe_net, layer): - layer = L.ShuffleChannel(layer, group=self.groups) - caffe_net[self.g_name] = layer - return layer - - -def channel_shuffle(name, groups): - return g_name(name, ChannelShuffle(groups)) - - -class Permute(nn.Module): - def __init__(self, order): - super(Permute, self).__init__() - self.order = order - - def forward(self, x): - x = x.permute(*self.order).contiguous() - return x - - def generate_caffe_prototxt(self, caffe_net, layer): - layer = L.Permute(layer, order=list(self.order)) - caffe_net[self.g_name] = layer - return layer - - -def permute(name, order): - return g_name(name, Permute(order)) - - -class Flatten(nn.Module): - def __init__(self, axis): - super(Flatten, self).__init__() - self.axis = axis - - def forward(self, x): - assert self.axis == 1 - x = x.reshape(x.shape[0], -1) - return x - - def generate_caffe_prototxt(self, caffe_net, layer): - layer = L.Flatten(layer, axis=self.axis) - caffe_net[self.g_name] = layer - return layer - - -def flatten(name, axis): - return g_name(name, Flatten(axis)) - - -def generate_caffe_prototxt(m, caffe_net, layer): - if hasattr(m, 'generate_caffe_prototxt'): - return m.generate_caffe_prototxt(caffe_net, layer) - - if isinstance(m, nn.Sequential): - for module in m: - layer = generate_caffe_prototxt(module, caffe_net, layer) - return layer - - if isinstance(m, nn.Conv2d): - if m.bias is None: - param=[dict(lr_mult=1, decay_mult=1)] - else: - param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=0)] - assert m.dilation[0] == m.dilation[1] - convolution_param=dict( - num_output=m.out_channels, - group=m.groups, bias_term=(m.bias is not None), - weight_filler=dict(type='msra'), - dilation=m.dilation[0], - ) - if m.kernel_size[0] == m.kernel_size[1]: - convolution_param['kernel_size'] = m.kernel_size[0] - else: - convolution_param['kernel_h'] = m.kernel_size[0] - convolution_param['kernel_w'] = m.kernel_size[1] - if m.stride[0] == m.stride[1]: - convolution_param['stride'] = m.stride[0] - else: - convolution_param['stride_h'] = m.stride[0] - convolution_param['stride_w'] = m.stride[1] - if m.padding[0] == m.padding[1]: - convolution_param['pad'] = m.padding[0] - else: - convolution_param['pad_h'] = m.padding[0] - convolution_param['pad_w'] = m.padding[1] - layer = L.Convolution( - layer, - param=param, - convolution_param=convolution_param, - ) - caffe_net.tops[m.g_name] = layer - return layer - - if isinstance(m, nn.ConvTranspose2d): - if m.bias is None: - param=[dict(lr_mult=1, decay_mult=1)] - else: - param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=0)] - assert m.dilation[0] == m.dilation[1] - convolution_param=dict( - num_output=m.out_channels, - group=m.groups, bias_term=(m.bias is not None), - weight_filler=dict(type='msra'), - dilation=m.dilation[0], - ) - if m.kernel_size[0] == m.kernel_size[1]: - convolution_param['kernel_size'] = m.kernel_size[0] - else: - convolution_param['kernel_h'] = m.kernel_size[0] - convolution_param['kernel_w'] = m.kernel_size[1] - if m.stride[0] == m.stride[1]: - convolution_param['stride'] = m.stride[0] - else: - convolution_param['stride_h'] = m.stride[0] - convolution_param['stride_w'] = m.stride[1] - if m.padding[0] == m.padding[1]: - convolution_param['pad'] = m.padding[0] - else: - convolution_param['pad_h'] = m.padding[0] - convolution_param['pad_w'] = m.padding[1] - layer = L.Deconvolution( - layer, - param=param, - convolution_param=convolution_param, - ) - caffe_net.tops[m.g_name] = layer - return layer - - if isinstance(m, nn.BatchNorm2d): - layer = L.BatchNorm( - layer, in_place=True, - param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], - ) - caffe_net[m.g_name] = layer - if m.affine: - layer = L.Scale( - layer, in_place=True, bias_term=True, - filler=dict(type='constant', value=1), bias_filler=dict(type='constant', value=0), - param=[dict(lr_mult=1, decay_mult=0), dict(lr_mult=1, decay_mult=0)], - ) - caffe_net[m.g_name + '/scale'] = layer - return layer - - if isinstance(m, nn.ReLU): - layer = L.ReLU(layer, in_place=True) - caffe_net.tops[m.g_name] = layer - return layer - - if isinstance(m, nn.PReLU): - layer = L.PReLU(layer) - caffe_net.tops[m.g_name] = layer - return layer - - if isinstance(m, nn.AvgPool2d) or isinstance(m, nn.MaxPool2d): - if isinstance(m, nn.AvgPool2d): - pooling_param = dict(pool=P.Pooling.AVE) - else: - pooling_param = dict(pool=P.Pooling.MAX) - if isinstance(m.kernel_size, tuple) or isinstance(m.kernel_size, list): - pooling_param['kernel_h'] = m.kernel_size[0] - pooling_param['kernel_w'] = m.kernel_size[1] - else: - pooling_param['kernel_size'] = m.kernel_size - if isinstance(m.stride, tuple) or isinstance(m.stride, list): - pooling_param['stride_h'] = m.stride[0] - pooling_param['stride_w'] = m.stride[1] - else: - pooling_param['stride'] = m.stride - if isinstance(m.padding, tuple) or isinstance(m.padding, list): - pooling_param['pad_h'] = m.padding[0] - pooling_param['pad_w'] = m.padding[1] - else: - pooling_param['pad'] = m.padding - layer = L.Pooling(layer, pooling_param=pooling_param) - caffe_net.tops[m.g_name] = layer - return layer - raise Exception("Unknow module '%s' to generate caffe prototxt." % m) - - -def convert_pytorch_to_caffe(torch_net, caffe_net): - for name, m in torch_net.named_modules(): - if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): - print('convert conv:', name, m.g_name, m) - caffe_net.params[m.g_name][0].data[...] = m.weight.data.cpu().numpy() - if m.bias is not None: - caffe_net.params[m.g_name][1].data[...] = m.bias.data.cpu().numpy() - if isinstance(m, nn.BatchNorm2d): - print('convert bn:', name, m.g_name, m) - caffe_net.params[m.g_name][0].data[...] = m.running_mean.cpu().numpy() - caffe_net.params[m.g_name][1].data[...] = m.running_var.cpu().numpy() - caffe_net.params[m.g_name][2].data[...] = 1 - if m.affine: - caffe_net.params[m.g_name + '/scale'][0].data[...] = m.weight.data.cpu().numpy() - caffe_net.params[m.g_name + '/scale'][1].data[...] = m.bias.data.cpu().numpy() - - -def conv_bn_relu(name, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1): - return nn.Sequential( - g_name(name, nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, False)), - g_name(name + '/bn', nn.BatchNorm2d(out_channels)), - g_name(name + '/relu', nn.ReLU(inplace=True)), - ) - - -def conv_bn(name, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1): - return nn.Sequential( - g_name(name, nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, False)), - g_name(name + '/bn', nn.BatchNorm2d(out_channels)), - ) - - -def conv(name, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1): - return g_name(name, nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, True)) - - -def conv_relu(name, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1): - return nn.Sequential( - g_name(name, nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, True)), - g_name(name + '/relu', nn.ReLU()), - ) - -def conv_prelu(name, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1): - return nn.Sequential( - g_name(name, nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, True)), - g_name(name + '/prelu', nn.PReLU()), - ) - - -if __name__ == '__main__': - - class BasicBlock(nn.Module): - - def __init__(self, name, in_channels, middle_channels, out_channels, stride, residual): - super(BasicBlock, self).__init__() - self.g_name = name - self.residual = residual - self.conv = [ - conv_bn(name + '/conv1', - in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels), - conv_bn_relu(name + '/conv2', in_channels, middle_channels, 1), - conv_bn(name + '/conv3', middle_channels, out_channels, 1), - ] - self.conv = nn.Sequential(*self.conv) - # self.relu = g_name(name + '/relu', nn.ReLU(inplace=True)) - - def forward(self, x): - x = x + self.conv(x) if self.residual else self.conv(x) - # x = self.relu(x) - return x - - def generate_caffe_prototxt(self, caffe_net, layer): - residual_layer = layer - layer = generate_caffe_prototxt(self.conv, caffe_net, layer) - if self.residual: - layer = L.Eltwise(residual_layer, layer, operation=P.Eltwise.SUM) - caffe_net[self.g_name + '/sum'] = layer - # layer = generate_caffe_prototxt(self.relu, caffe_net, layer) - return layer - - - class Network(nn.Module): - - def __init__(self, num_outputs, width_multiplier=32): - super(Network, self).__init__() - - assert width_multiplier >= 0 and width_multiplier <= 256 - # assert width_multiplier % 2 == 0 - - self.network = [ - g_name('data/bn', nn.BatchNorm2d(3)), - conv_bn_relu('stage1/conv', 3, 32, 3, 2, 1), - # g_name('stage1/pool', nn.MaxPool2d(3, 2, 0, ceil_mode=True)), - ] - channel = lambda i: (2**i) * int(width_multiplier) - network_parameters = [ - (32, channel(2) * 4, channel(2), 2, 2), - (channel(2), channel(2) * 4, channel(2), 2, 4), - (channel(2), channel(3) * 4, channel(3), 2, 8), - (channel(3), channel(4) * 4, channel(4), 2, 4), - ] - for i, parameters in enumerate(network_parameters): - in_channels, middle_channels, out_channels, stride, num_blocks = parameters - self.network += [self._generate_stage('stage_{}'.format(i + 2), - in_channels, middle_channels, out_channels, stride, num_blocks)] - self.network += [ - conv_bn_relu('unsqueeze', out_channels, out_channels * 4, 1), - g_name('pool_fc', nn.AvgPool2d(7)), - g_name('fc', nn.Conv2d(out_channels * 4, num_outputs, 1)), - ] - self.network = nn.Sequential(*self.network) - - for name, m in self.named_modules(): - if any(map(lambda x: isinstance(m, x), [nn.Linear, nn.Conv1d, nn.Conv2d])): - nn.init.kaiming_normal(m.weight, mode='fan_out') - if m.bias is not None: - nn.init.constant(m.bias, 0) - - def _generate_stage(self, name, in_channels, middle_channels, out_channels, stride, num_blocks): - blocks = [BasicBlock(name + '_1', in_channels, middle_channels, out_channels, 2, False)] - for i in range(1, num_blocks): - blocks.append(BasicBlock(name + '_{}'.format(i + 1), - out_channels, middle_channels, out_channels, 1, True)) - return nn.Sequential(*blocks) - - def forward(self, x): - return self.network(x).view(x.size(0), -1) - - def generate_caffe_prototxt(self, caffe_net, layer): - return generate_caffe_prototxt(self.network, caffe_net, layer) - - def convert_to_caffe(self, name): - caffe_net = caffe.NetSpec() - layer = L.Input(shape=dict(dim=[1, 3, 224, 224])) - caffe_net.tops['data'] = layer - generate_caffe_prototxt(self, caffe_net, layer) - print(caffe_net.to_proto()) - with open(name + '.prototxt', 'wb') as f: - f.write(str(caffe_net.to_proto())) - caffe_net = caffe.Net(name + '.prototxt', caffe.TEST) - convert_pytorch_to_caffe(self, caffe_net) - caffe_net.save(name + '.caffemodel') - - - network = Network(1000, 8) - print(network) - network.convert_to_caffe('net') \ No newline at end of file