Initial commit: VGG16, VGG19, ResNet50

fchollet · Aug 3, 2016 · 13d7762 · 13d7762
commit 13d7762
Show file tree

Hide file tree

Showing 5 changed files with 701 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,78 @@
+# Trained image classification models for Keras
+
+This repository contains code and weights files for the following Keras models:
+
+- VGG16
+- VGG19
+- ResNet50
+
+We plan on adding Inception v3 soon.
+
+All architectures are compatible with both TensorFlow and Theano, and upon instantiation the models will be built according to the image dimension ordering set in your Keras configuration file at `~/.keras/keras.json`. For instance, if you have set `image_dim_ordering=tf`, then any model loaded from this repository will get built according to the TensorFlow dimension ordering convention, "Width-Height-Depth".
+
+Weights can be automatically loaded upon instantiation (`weights='imagenet'` argument in model constructor). Weights are automatically downloaded if necessary, and cached locally in `~/.keras/models/`.
+
+## Examples
+
+### Classify images
+
+```python
+from resnet50 import ResNet50
+from keras.preprocessing import image
+from imagenet_utils import preprocess_input, decode_predictions
+
+model = ResNet50(weights='imagenet')
+
+img_path = 'elephant.jpg'
+img = image.load_img(img_path, target_size=(224, 224))
+x = image.img_to_array(img)
+x = np.expand_dims(x, axis=0)
+x = preprocess_input(x)
+
+preds = model.predict(x)
+print('Predicted:', decode_predictions(preds))
+# print: [[u'n02504458', u'African_elephant']]
+```
+
+### Extract features from images
+
+```python
+from vgg16 import VGG16
+from keras.preprocessing import image
+from imagenet_utils import preprocess_input
+
+model = VGG16(weights='imagenet', include_top=False)
+
+img_path = 'elephant.jpg'
+img = image.load_img(img_path, target_size=(224, 224))
+x = image.img_to_array(img)
+x = np.expand_dims(x, axis=0)
+x = preprocess_input(x)
+
+features = model.predict(x)
+```
+
+### Extract features from an arbitrary intermediate layer
+
+```python
+from vgg19 import VGG19
+from keras.preprocessing import image
+from imagenet_utils import preprocess_input
+from keras.models import Model
+
+base_model = VGG19(weights='imagenet')
+model = Model(input=base_model.input, output=base_model.get_layer('block4_pool').output)
+
+img_path = 'elephant.jpg'
+img = image.load_img(img_path, target_size=(224, 224))
+x = image.img_to_array(img)
+x = np.expand_dims(x, axis=0)
+x = preprocess_input(x)
+
+block4_pool_features = model.predict(x)
+```
+
+## References
+
+- [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556)
+- [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
diff --git a/imagenet_utils.py b/imagenet_utils.py
@@ -0,0 +1,43 @@
+import numpy as np
+import json
+
+from keras.utils.data_utils import get_file
+from keras import backend as K
+
+CLASS_INDEX = None
+CLASS_INDEX_PATH = 'https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json'
+
+
+def preprocess_input(x, dim_ordering='default'):
+    if dim_ordering == 'default':
+        dim_ordering = K.image_dim_ordering()
+    assert dim_ordering in {'tf', 'th'}
+
+    if dim_ordering == 'th':
+        x[:, 0, :, :] -= 103.939
+        x[:, 1, :, :] -= 116.779
+        x[:, 2, :, :] -= 123.68
+        # 'RGB'->'BGR'
+        x = x[:, ::-1, :, :]
+    else:
+        x[:, :, :, 0] -= 103.939
+        x[:, :, :, 1] -= 116.779
+        x[:, :, :, 2] -= 123.68
+        # 'RGB'->'BGR'
+        x = x[:, :, :, ::-1]
+    return x
+
+
+def decode_predictions(preds):
+    global CLASS_INDEX
+    assert len(preds.shape) == 2 and preds.shape[1] == 1000
+    if CLASS_INDEX is None:
+        fpath = get_file('imagenet_class_index.json',
+                         CLASS_INDEX_PATH,
+                         cache_subdir='models')
+        CLASS_INDEX = json.load(open(fpath))
+    indices = np.argmax(preds, axis=-1)
+    results = []
+    for i in indices:
+        results.append(CLASS_INDEX[str(i)])
+    return results
diff --git a/resnet50.py b/resnet50.py
@@ -0,0 +1,247 @@
+# -*- coding: utf-8 -*-
+'''ResNet50 model for Keras.
+
+# Reference:
+
+- [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
+
+Adapted from code contributed by BigMoyan.
+'''
+from __future__ import print_function
+
+import numpy as np
+import warnings
+
+from keras.layers import merge, Input
+from keras.layers import Dense, Activation, Flatten
+from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D, AveragePooling2D
+from keras.layers import BatchNormalization
+from keras.models import Model
+from keras.preprocessing import image
+import keras.backend as K
+from keras.utils.layer_utils import convert_all_kernels_in_model
+from keras.utils.data_utils import get_file
+from imagenet_utils import decode_predictions, preprocess_input
+
+
+TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/resnet50_weights_th_dim_ordering_th_kernels.h5'
+TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/resnet50_weights_tf_dim_ordering_tf_kernels.h5'
+TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/resnet50_weights_th_dim_ordering_th_kernels_notop.h5'
+TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
+
+
+def identity_block(input_tensor, kernel_size, filters, stage, block):
+    '''The identity_block is the block that has no conv layer at shortcut
+
+    # Arguments
+        input_tensor: input tensor
+        kernel_size: defualt 3, the kernel size of middle conv layer at main path
+        filters: list of integers, the nb_filters of 3 conv layer at main path
+        stage: integer, current stage label, used for generating layer names
+        block: 'a','b'..., current block label, used for generating layer names
+    '''
+    nb_filter1, nb_filter2, nb_filter3 = filters
+    if K.image_dim_ordering() == 'tf':
+        bn_axis = 3
+    else:
+        bn_axis = 1
+    conv_name_base = 'res' + str(stage) + block + '_branch'
+    bn_name_base = 'bn' + str(stage) + block + '_branch'
+
+    x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor)
+    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
+    x = Activation('relu')(x)
+
+    x = Convolution2D(nb_filter2, kernel_size, kernel_size,
+                      border_mode='same', name=conv_name_base + '2b')(x)
+    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
+    x = Activation('relu')(x)
+
+    x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
+    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
+
+    x = merge([x, input_tensor], mode='sum')
+    x = Activation('relu')(x)
+    return x
+
+
+def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
+    '''conv_block is the block that has a conv layer at shortcut
+
+    # Arguments
+        input_tensor: input tensor
+        kernel_size: defualt 3, the kernel size of middle conv layer at main path
+        filters: list of integers, the nb_filters of 3 conv layer at main path
+        stage: integer, current stage label, used for generating layer names
+        block: 'a','b'..., current block label, used for generating layer names
+
+    Note that from stage 3, the first conv layer at main path is with subsample=(2,2)
+    And the shortcut should have subsample=(2,2) as well
+    '''
+    nb_filter1, nb_filter2, nb_filter3 = filters
+    if K.image_dim_ordering() == 'tf':
+        bn_axis = 3
+    else:
+        bn_axis = 1
+    conv_name_base = 'res' + str(stage) + block + '_branch'
+    bn_name_base = 'bn' + str(stage) + block + '_branch'
+
+    x = Convolution2D(nb_filter1, 1, 1, subsample=strides,
+                      name=conv_name_base + '2a')(input_tensor)
+    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
+    x = Activation('relu')(x)
+
+    x = Convolution2D(nb_filter2, kernel_size, kernel_size, border_mode='same',
+                      name=conv_name_base + '2b')(x)
+    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
+    x = Activation('relu')(x)
+
+    x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
+    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
+
+    shortcut = Convolution2D(nb_filter3, 1, 1, subsample=strides,
+                             name=conv_name_base + '1')(input_tensor)
+    shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)
+
+    x = merge([x, shortcut], mode='sum')
+    x = Activation('relu')(x)
+    return x
+
+
+def ResNet50(include_top=True, weights='imagenet',
+             input_tensor=None):
+    '''Instantiate the ResNet50 architecture,
+    optionally loading weights pre-trained
+    on ImageNet. Note that when using TensorFlow,
+    for best performance you should set
+    `image_dim_ordering="tf"` in your Keras config
+    at ~/.keras/keras.json.
+
+    The model and the weights are compatible with both
+    TensorFlow and Theano. The dimension ordering
+    convention used by the model is the one
+    specified in your Keras config file.
+
+    # Arguments
+        include_top: whether to include the 3 fully-connected
+            layers at the top of the network.
+        weights: one of `None` (random initialization)
+            or "imagenet" (pre-training on ImageNet).
+        input_tensor: optional Keras tensor (i.e. xput of `layers.Input()`)
+            to use as image input for the model.
+
+    # Returns
+        A Keras model instance.
+    '''
+    if weights not in {'imagenet', None}:
+        raise ValueError('The `weights` argument should be either '
+                         '`None` (random initialization) or `imagenet` '
+                         '(pre-training on ImageNet).')
+    # Determine proper input shape
+    if K.image_dim_ordering() == 'th':
+        if include_top:
+            input_shape = (3, 224, 224)
+        else:
+            input_shape = (3, None, None)
+    else:
+        if include_top:
+            input_shape = (224, 224, 3)
+        else:
+            input_shape = (None, None, 3)
+
+    if input_tensor is None:
+        img_input = Input(shape=input_shape)
+    else:
+        if not K.is_keras_tensor(input_tensor):
+            img_input = Input(tensor=input_tensor)
+        else:
+            img_input = input_tensor
+    if K.image_dim_ordering() == 'tf':
+        bn_axis = 3
+    else:
+        bn_axis = 1
+
+    x = ZeroPadding2D((3, 3))(img_input)
+    x = Convolution2D(64, 7, 7, subsample=(2, 2), name='conv1')(x)
+    x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
+    x = Activation('relu')(x)
+    x = MaxPooling2D((3, 3), strides=(2, 2))(x)
+
+    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
+    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
+    x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
+
+    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
+    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
+    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
+    x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
+
+    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
+    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
+    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
+    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
+    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
+    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
+
+    x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
+    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
+    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
+
+    x = AveragePooling2D((7, 7), name='avg_pool')(x)
+
+    if include_top:
+        x = Flatten()(x)
+        x = Dense(1000, activation='softmax', name='fc1000')(x)
+
+    model = Model(img_input, x)
+
+    # load weights
+    if weights == 'imagenet':
+        print('K.image_dim_ordering:', K.image_dim_ordering())
+        if K.image_dim_ordering() == 'th':
+            if include_top:
+                weights_path = get_file('resnet50_weights_th_dim_ordering_th_kernels.h5',
+                                        TH_WEIGHTS_PATH,
+                                        cache_subdir='models')
+            else:
+                weights_path = get_file('resnet50_weights_th_dim_ordering_th_kernels_notop.h5',
+                                        TH_WEIGHTS_PATH_NO_TOP,
+                                        cache_subdir='models')
+            model.load_weights(weights_path)
+            if K.backend() == 'tensorflow':
+                warnings.warn('You are using the TensorFlow backend, yet you '
+                              'are using the Theano '
+                              'image dimension ordering convention '
+                              '(`image_dim_ordering="th"`). '
+                              'For best performance, set '
+                              '`image_dim_ordering="tf"` in '
+                              'your Keras config '
+                              'at ~/.keras/keras.json.')
+                convert_all_kernels_in_model(model)
+        else:
+            if include_top:
+                weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels.h5',
+                                        TF_WEIGHTS_PATH,
+                                        cache_subdir='models')
+            else:
+                weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
+                                        TF_WEIGHTS_PATH_NO_TOP,
+                                        cache_subdir='models')
+            model.load_weights(weights_path)
+            if K.backend() == 'theano':
+                convert_all_kernels_in_model(model)
+    return model
+
+
+if __name__ == '__main__':
+    model = ResNet50(include_top=True, weights='imagenet')
+
+    img_path = 'elephant.jpg'
+    img = image.load_img(img_path, target_size=(224, 224))
+    x = image.img_to_array(img)
+    x = np.expand_dims(x, axis=0)
+    x = preprocess_input(x)
+    print('Input image shape:', x.shape)
+
+    preds = model.predict(x)
+    print('Predicted:', decode_predictions(preds))