-
Notifications
You must be signed in to change notification settings - Fork 117
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Port visualizing what convnets learn (#640)
* initial-file from keras io * port to keras core
- Loading branch information
Showing
1 changed file
with
209 additions
and
0 deletions.
There are no files selected for viewing
209 changes: 209 additions & 0 deletions
209
examples/keras_io/tensorflow/vision/visualizing_what_convnets_learn.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,209 @@ | ||
""" | ||
Title: Visualizing what convnets learn | ||
Author: [fchollet](https://twitter.com/fchollet) | ||
Date created: 2020/05/29 | ||
Last modified: 2020/05/29 | ||
Description: Displaying the visual patterns that convnet filters respond to. | ||
Accelerator: GPU | ||
""" | ||
""" | ||
## Introduction | ||
In this example, we look into what sort of visual patterns image classification models | ||
learn. We'll be using the `ResNet50V2` model, trained on the ImageNet dataset. | ||
Our process is simple: we will create input images that maximize the activation of | ||
specific filters in a target layer (picked somewhere in the middle of the model: layer | ||
`conv3_block4_out`). Such images represent a visualization of the | ||
pattern that the filter responds to. | ||
""" | ||
|
||
""" | ||
## Setup | ||
""" | ||
|
||
import os | ||
|
||
os.environ["KERAS_BACKEND"] = "tensorflow" | ||
|
||
import keras_core as keras | ||
|
||
|
||
import numpy as np | ||
import tensorflow as tf | ||
|
||
# The dimensions of our input image | ||
img_width = 180 | ||
img_height = 180 | ||
# Our target layer: we will visualize the filters from this layer. | ||
# See `model.summary()` for list of layer names, if you want to change this. | ||
layer_name = "conv3_block4_out" | ||
|
||
""" | ||
## Build a feature extraction model | ||
""" | ||
|
||
# Build a ResNet50V2 model loaded with pre-trained ImageNet weights | ||
model = keras.applications.ResNet50V2(weights="imagenet", include_top=False) | ||
|
||
# Set up a model that returns the activation values for our target layer | ||
layer = model.get_layer(name=layer_name) | ||
feature_extractor = keras.Model(inputs=model.inputs, outputs=layer.output) | ||
|
||
""" | ||
## Set up the gradient ascent process | ||
The "loss" we will maximize is simply the mean of the activation of a specific filter in | ||
our target layer. To avoid border effects, we exclude border pixels. | ||
""" | ||
|
||
|
||
def compute_loss(input_image, filter_index): | ||
activation = feature_extractor(input_image) | ||
# We avoid border artifacts by only involving non-border pixels in the loss. | ||
filter_activation = activation[:, 2:-2, 2:-2, filter_index] | ||
return tf.reduce_mean(filter_activation) | ||
|
||
|
||
""" | ||
Our gradient ascent function simply computes the gradients of the loss above | ||
with regard to the input image, and update the update image so as to move it | ||
towards a state that will activate the target filter more strongly. | ||
""" | ||
|
||
|
||
@tf.function | ||
def gradient_ascent_step(img, filter_index, learning_rate): | ||
with tf.GradientTape() as tape: | ||
tape.watch(img) | ||
loss = compute_loss(img, filter_index) | ||
# Compute gradients. | ||
grads = tape.gradient(loss, img) | ||
# Normalize gradients. | ||
grads = tf.math.l2_normalize(grads) | ||
img += learning_rate * grads | ||
return loss, img | ||
|
||
|
||
""" | ||
## Set up the end-to-end filter visualization loop | ||
Our process is as follow: | ||
- Start from a random image that is close to "all gray" (i.e. visually netural) | ||
- Repeatedly apply the gradient ascent step function defined above | ||
- Convert the resulting input image back to a displayable form, by normalizing it, | ||
center-cropping it, and restricting it to the [0, 255] range. | ||
""" | ||
|
||
|
||
def initialize_image(): | ||
# We start from a gray image with some random noise | ||
img = tf.random.uniform((1, img_width, img_height, 3)) | ||
# ResNet50V2 expects inputs in the range [-1, +1]. | ||
# Here we scale our random inputs to [-0.125, +0.125] | ||
return (img - 0.5) * 0.25 | ||
|
||
|
||
def visualize_filter(filter_index): | ||
# We run gradient ascent for 20 steps | ||
iterations = 30 | ||
learning_rate = 10.0 | ||
img = initialize_image() | ||
for iteration in range(iterations): | ||
loss, img = gradient_ascent_step(img, filter_index, learning_rate) | ||
|
||
# Decode the resulting input image | ||
img = deprocess_image(img[0].numpy()) | ||
return loss, img | ||
|
||
|
||
def deprocess_image(img): | ||
# Normalize array: center on 0., ensure variance is 0.15 | ||
img -= img.mean() | ||
img /= img.std() + 1e-5 | ||
img *= 0.15 | ||
|
||
# Center crop | ||
img = img[25:-25, 25:-25, :] | ||
|
||
# Clip to [0, 1] | ||
img += 0.5 | ||
img = np.clip(img, 0, 1) | ||
|
||
# Convert to RGB array | ||
img *= 255 | ||
img = np.clip(img, 0, 255).astype("uint8") | ||
return img | ||
|
||
|
||
""" | ||
Let's try it out with filter 0 in the target layer: | ||
""" | ||
|
||
from IPython.display import Image, display | ||
|
||
loss, img = visualize_filter(0) | ||
keras.utils.save_img("0.png", img) | ||
|
||
""" | ||
This is what an input that maximizes the response of filter 0 in the target layer would | ||
look like: | ||
""" | ||
|
||
display(Image("0.png")) | ||
|
||
""" | ||
## Visualize the first 64 filters in the target layer | ||
Now, let's make a 8x8 grid of the first 64 filters | ||
in the target layer to get of feel for the range | ||
of different visual patterns that the model has learned. | ||
""" | ||
|
||
# Compute image inputs that maximize per-filter activations | ||
# for the first 64 filters of our target layer | ||
all_imgs = [] | ||
for filter_index in range(64): | ||
print("Processing filter %d" % (filter_index,)) | ||
loss, img = visualize_filter(filter_index) | ||
all_imgs.append(img) | ||
|
||
# Build a black picture with enough space for | ||
# our 8 x 8 filters of size 128 x 128, with a 5px margin in between | ||
margin = 5 | ||
n = 8 | ||
cropped_width = img_width - 25 * 2 | ||
cropped_height = img_height - 25 * 2 | ||
width = n * cropped_width + (n - 1) * margin | ||
height = n * cropped_height + (n - 1) * margin | ||
stitched_filters = np.zeros((width, height, 3)) | ||
|
||
# Fill the picture with our saved filters | ||
for i in range(n): | ||
for j in range(n): | ||
img = all_imgs[i * n + j] | ||
stitched_filters[ | ||
(cropped_width + margin) * i : (cropped_width + margin) * i + cropped_width, | ||
(cropped_height + margin) * j : (cropped_height + margin) * j | ||
+ cropped_height, | ||
:, | ||
] = img | ||
keras.utils.save_img("stiched_filters.png", stitched_filters) | ||
|
||
from IPython.display import Image, display | ||
|
||
display(Image("stiched_filters.png")) | ||
|
||
""" | ||
Image classification models see the world by decomposing their inputs over a "vector | ||
basis" of texture filters such as these. | ||
See also | ||
[this old blog post](https://blog.keras.io/how-convolutional-neural-networks-see-the-world.html) | ||
for analysis and interpretation. | ||
Example available on HuggingFace. | ||
[![Generic badge](https://img.shields.io/badge/🤗%20Spaces-What%20Convnets%20Learn-black.svg)](https://huggingface.co/spaces/keras-io/what-convnets-learn) | ||
""" |