-
Notifications
You must be signed in to change notification settings - Fork 2.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial commit: VGG16, VGG19, ResNet50
- Loading branch information
0 parents
commit 13d7762
Showing
5 changed files
with
701 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
# Trained image classification models for Keras | ||
|
||
This repository contains code and weights files for the following Keras models: | ||
|
||
- VGG16 | ||
- VGG19 | ||
- ResNet50 | ||
|
||
We plan on adding Inception v3 soon. | ||
|
||
All architectures are compatible with both TensorFlow and Theano, and upon instantiation the models will be built according to the image dimension ordering set in your Keras configuration file at `~/.keras/keras.json`. For instance, if you have set `image_dim_ordering=tf`, then any model loaded from this repository will get built according to the TensorFlow dimension ordering convention, "Width-Height-Depth". | ||
|
||
Weights can be automatically loaded upon instantiation (`weights='imagenet'` argument in model constructor). Weights are automatically downloaded if necessary, and cached locally in `~/.keras/models/`. | ||
|
||
## Examples | ||
|
||
### Classify images | ||
|
||
```python | ||
from resnet50 import ResNet50 | ||
from keras.preprocessing import image | ||
from imagenet_utils import preprocess_input, decode_predictions | ||
|
||
model = ResNet50(weights='imagenet') | ||
|
||
img_path = 'elephant.jpg' | ||
img = image.load_img(img_path, target_size=(224, 224)) | ||
x = image.img_to_array(img) | ||
x = np.expand_dims(x, axis=0) | ||
x = preprocess_input(x) | ||
|
||
preds = model.predict(x) | ||
print('Predicted:', decode_predictions(preds)) | ||
# print: [[u'n02504458', u'African_elephant']] | ||
``` | ||
|
||
### Extract features from images | ||
|
||
```python | ||
from vgg16 import VGG16 | ||
from keras.preprocessing import image | ||
from imagenet_utils import preprocess_input | ||
|
||
model = VGG16(weights='imagenet', include_top=False) | ||
|
||
img_path = 'elephant.jpg' | ||
img = image.load_img(img_path, target_size=(224, 224)) | ||
x = image.img_to_array(img) | ||
x = np.expand_dims(x, axis=0) | ||
x = preprocess_input(x) | ||
|
||
features = model.predict(x) | ||
``` | ||
|
||
### Extract features from an arbitrary intermediate layer | ||
|
||
```python | ||
from vgg19 import VGG19 | ||
from keras.preprocessing import image | ||
from imagenet_utils import preprocess_input | ||
from keras.models import Model | ||
|
||
base_model = VGG19(weights='imagenet') | ||
model = Model(input=base_model.input, output=base_model.get_layer('block4_pool').output) | ||
|
||
img_path = 'elephant.jpg' | ||
img = image.load_img(img_path, target_size=(224, 224)) | ||
x = image.img_to_array(img) | ||
x = np.expand_dims(x, axis=0) | ||
x = preprocess_input(x) | ||
|
||
block4_pool_features = model.predict(x) | ||
``` | ||
|
||
## References | ||
|
||
- [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) | ||
- [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import numpy as np | ||
import json | ||
|
||
from keras.utils.data_utils import get_file | ||
from keras import backend as K | ||
|
||
CLASS_INDEX = None | ||
CLASS_INDEX_PATH = 'https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json' | ||
|
||
|
||
def preprocess_input(x, dim_ordering='default'): | ||
if dim_ordering == 'default': | ||
dim_ordering = K.image_dim_ordering() | ||
assert dim_ordering in {'tf', 'th'} | ||
|
||
if dim_ordering == 'th': | ||
x[:, 0, :, :] -= 103.939 | ||
x[:, 1, :, :] -= 116.779 | ||
x[:, 2, :, :] -= 123.68 | ||
# 'RGB'->'BGR' | ||
x = x[:, ::-1, :, :] | ||
else: | ||
x[:, :, :, 0] -= 103.939 | ||
x[:, :, :, 1] -= 116.779 | ||
x[:, :, :, 2] -= 123.68 | ||
# 'RGB'->'BGR' | ||
x = x[:, :, :, ::-1] | ||
return x | ||
|
||
|
||
def decode_predictions(preds): | ||
global CLASS_INDEX | ||
assert len(preds.shape) == 2 and preds.shape[1] == 1000 | ||
if CLASS_INDEX is None: | ||
fpath = get_file('imagenet_class_index.json', | ||
CLASS_INDEX_PATH, | ||
cache_subdir='models') | ||
CLASS_INDEX = json.load(open(fpath)) | ||
indices = np.argmax(preds, axis=-1) | ||
results = [] | ||
for i in indices: | ||
results.append(CLASS_INDEX[str(i)]) | ||
return results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,247 @@ | ||
# -*- coding: utf-8 -*- | ||
'''ResNet50 model for Keras. | ||
# Reference: | ||
- [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) | ||
Adapted from code contributed by BigMoyan. | ||
''' | ||
from __future__ import print_function | ||
|
||
import numpy as np | ||
import warnings | ||
|
||
from keras.layers import merge, Input | ||
from keras.layers import Dense, Activation, Flatten | ||
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D, AveragePooling2D | ||
from keras.layers import BatchNormalization | ||
from keras.models import Model | ||
from keras.preprocessing import image | ||
import keras.backend as K | ||
from keras.utils.layer_utils import convert_all_kernels_in_model | ||
from keras.utils.data_utils import get_file | ||
from imagenet_utils import decode_predictions, preprocess_input | ||
|
||
|
||
TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/resnet50_weights_th_dim_ordering_th_kernels.h5' | ||
TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/resnet50_weights_tf_dim_ordering_tf_kernels.h5' | ||
TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/resnet50_weights_th_dim_ordering_th_kernels_notop.h5' | ||
TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' | ||
|
||
|
||
def identity_block(input_tensor, kernel_size, filters, stage, block): | ||
'''The identity_block is the block that has no conv layer at shortcut | ||
# Arguments | ||
input_tensor: input tensor | ||
kernel_size: defualt 3, the kernel size of middle conv layer at main path | ||
filters: list of integers, the nb_filters of 3 conv layer at main path | ||
stage: integer, current stage label, used for generating layer names | ||
block: 'a','b'..., current block label, used for generating layer names | ||
''' | ||
nb_filter1, nb_filter2, nb_filter3 = filters | ||
if K.image_dim_ordering() == 'tf': | ||
bn_axis = 3 | ||
else: | ||
bn_axis = 1 | ||
conv_name_base = 'res' + str(stage) + block + '_branch' | ||
bn_name_base = 'bn' + str(stage) + block + '_branch' | ||
|
||
x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor) | ||
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) | ||
x = Activation('relu')(x) | ||
|
||
x = Convolution2D(nb_filter2, kernel_size, kernel_size, | ||
border_mode='same', name=conv_name_base + '2b')(x) | ||
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) | ||
x = Activation('relu')(x) | ||
|
||
x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x) | ||
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) | ||
|
||
x = merge([x, input_tensor], mode='sum') | ||
x = Activation('relu')(x) | ||
return x | ||
|
||
|
||
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): | ||
'''conv_block is the block that has a conv layer at shortcut | ||
# Arguments | ||
input_tensor: input tensor | ||
kernel_size: defualt 3, the kernel size of middle conv layer at main path | ||
filters: list of integers, the nb_filters of 3 conv layer at main path | ||
stage: integer, current stage label, used for generating layer names | ||
block: 'a','b'..., current block label, used for generating layer names | ||
Note that from stage 3, the first conv layer at main path is with subsample=(2,2) | ||
And the shortcut should have subsample=(2,2) as well | ||
''' | ||
nb_filter1, nb_filter2, nb_filter3 = filters | ||
if K.image_dim_ordering() == 'tf': | ||
bn_axis = 3 | ||
else: | ||
bn_axis = 1 | ||
conv_name_base = 'res' + str(stage) + block + '_branch' | ||
bn_name_base = 'bn' + str(stage) + block + '_branch' | ||
|
||
x = Convolution2D(nb_filter1, 1, 1, subsample=strides, | ||
name=conv_name_base + '2a')(input_tensor) | ||
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) | ||
x = Activation('relu')(x) | ||
|
||
x = Convolution2D(nb_filter2, kernel_size, kernel_size, border_mode='same', | ||
name=conv_name_base + '2b')(x) | ||
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) | ||
x = Activation('relu')(x) | ||
|
||
x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x) | ||
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) | ||
|
||
shortcut = Convolution2D(nb_filter3, 1, 1, subsample=strides, | ||
name=conv_name_base + '1')(input_tensor) | ||
shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut) | ||
|
||
x = merge([x, shortcut], mode='sum') | ||
x = Activation('relu')(x) | ||
return x | ||
|
||
|
||
def ResNet50(include_top=True, weights='imagenet', | ||
input_tensor=None): | ||
'''Instantiate the ResNet50 architecture, | ||
optionally loading weights pre-trained | ||
on ImageNet. Note that when using TensorFlow, | ||
for best performance you should set | ||
`image_dim_ordering="tf"` in your Keras config | ||
at ~/.keras/keras.json. | ||
The model and the weights are compatible with both | ||
TensorFlow and Theano. The dimension ordering | ||
convention used by the model is the one | ||
specified in your Keras config file. | ||
# Arguments | ||
include_top: whether to include the 3 fully-connected | ||
layers at the top of the network. | ||
weights: one of `None` (random initialization) | ||
or "imagenet" (pre-training on ImageNet). | ||
input_tensor: optional Keras tensor (i.e. xput of `layers.Input()`) | ||
to use as image input for the model. | ||
# Returns | ||
A Keras model instance. | ||
''' | ||
if weights not in {'imagenet', None}: | ||
raise ValueError('The `weights` argument should be either ' | ||
'`None` (random initialization) or `imagenet` ' | ||
'(pre-training on ImageNet).') | ||
# Determine proper input shape | ||
if K.image_dim_ordering() == 'th': | ||
if include_top: | ||
input_shape = (3, 224, 224) | ||
else: | ||
input_shape = (3, None, None) | ||
else: | ||
if include_top: | ||
input_shape = (224, 224, 3) | ||
else: | ||
input_shape = (None, None, 3) | ||
|
||
if input_tensor is None: | ||
img_input = Input(shape=input_shape) | ||
else: | ||
if not K.is_keras_tensor(input_tensor): | ||
img_input = Input(tensor=input_tensor) | ||
else: | ||
img_input = input_tensor | ||
if K.image_dim_ordering() == 'tf': | ||
bn_axis = 3 | ||
else: | ||
bn_axis = 1 | ||
|
||
x = ZeroPadding2D((3, 3))(img_input) | ||
x = Convolution2D(64, 7, 7, subsample=(2, 2), name='conv1')(x) | ||
x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) | ||
x = Activation('relu')(x) | ||
x = MaxPooling2D((3, 3), strides=(2, 2))(x) | ||
|
||
x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) | ||
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') | ||
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') | ||
|
||
x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') | ||
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') | ||
x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') | ||
x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') | ||
|
||
x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') | ||
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') | ||
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') | ||
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') | ||
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') | ||
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') | ||
|
||
x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') | ||
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') | ||
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') | ||
|
||
x = AveragePooling2D((7, 7), name='avg_pool')(x) | ||
|
||
if include_top: | ||
x = Flatten()(x) | ||
x = Dense(1000, activation='softmax', name='fc1000')(x) | ||
|
||
model = Model(img_input, x) | ||
|
||
# load weights | ||
if weights == 'imagenet': | ||
print('K.image_dim_ordering:', K.image_dim_ordering()) | ||
if K.image_dim_ordering() == 'th': | ||
if include_top: | ||
weights_path = get_file('resnet50_weights_th_dim_ordering_th_kernels.h5', | ||
TH_WEIGHTS_PATH, | ||
cache_subdir='models') | ||
else: | ||
weights_path = get_file('resnet50_weights_th_dim_ordering_th_kernels_notop.h5', | ||
TH_WEIGHTS_PATH_NO_TOP, | ||
cache_subdir='models') | ||
model.load_weights(weights_path) | ||
if K.backend() == 'tensorflow': | ||
warnings.warn('You are using the TensorFlow backend, yet you ' | ||
'are using the Theano ' | ||
'image dimension ordering convention ' | ||
'(`image_dim_ordering="th"`). ' | ||
'For best performance, set ' | ||
'`image_dim_ordering="tf"` in ' | ||
'your Keras config ' | ||
'at ~/.keras/keras.json.') | ||
convert_all_kernels_in_model(model) | ||
else: | ||
if include_top: | ||
weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels.h5', | ||
TF_WEIGHTS_PATH, | ||
cache_subdir='models') | ||
else: | ||
weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', | ||
TF_WEIGHTS_PATH_NO_TOP, | ||
cache_subdir='models') | ||
model.load_weights(weights_path) | ||
if K.backend() == 'theano': | ||
convert_all_kernels_in_model(model) | ||
return model | ||
|
||
|
||
if __name__ == '__main__': | ||
model = ResNet50(include_top=True, weights='imagenet') | ||
|
||
img_path = 'elephant.jpg' | ||
img = image.load_img(img_path, target_size=(224, 224)) | ||
x = image.img_to_array(img) | ||
x = np.expand_dims(x, axis=0) | ||
x = preprocess_input(x) | ||
print('Input image shape:', x.shape) | ||
|
||
preds = model.predict(x) | ||
print('Predicted:', decode_predictions(preds)) |
Oops, something went wrong.