In [93]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
import keras
from keras.datasets import fashion_mnist, cifar10
from keras.layers import Dense, Flatten, Normalization, Dropout, Conv2D, MaxPooling2D, RandomFlip, RandomRotation, RandomZoom, BatchNormalization, Activation, InputLayer
from keras.models import Sequential
from keras.losses import SparseCategoricalCrossentropy, CategoricalCrossentropy
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from keras import utils
import os
from keras.preprocessing.image import ImageDataGenerator

import matplotlib as mpl
import matplotlib.pyplot as plt
import datetime

In [None]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

# Transfer Learning

### Feature Extraction and Classification

One of the key concepts needed with transfer learning is the separating of the feature extraction from the convolutional layers and the classification done in the fully connected layers.
<ul>
<li> The convolutional layer finds features in the image. I.e. the output of the end of the convolutional layers is a set of image-y features. 
<li> The fully connected layers take those features and classify the thing. 
</ul>

The idea behind this is that we allow someone (like Google) to train their fancy network on a bunch of fast computers, using millions and millions of images. These classifiers get very good at extracting features from objects. 

When using these models we take those convolutional layers and slap on our own classifier at the end, so the pretrained convolutional layers extract a bunch of features with their massive amount of training, then we use those features to predict our data!

### Tensorboard Up-Front

we'll also launch the tensorboard prior to doing any training. Pay attention to the log locations in each callback, we can nest the logs in folders, then use the names and tensorboard's regex search to monitor each run as it progresses. 

In [94]:
epochs = 1
%load_ext tensorboard
%tensorboard --logdir=logs

### Download Model

There are several models that are pretrained and available to us to use. VGG16 is one developed to do image recognition, the name stands for "Visual Geometry Group" - a group of researchers at the University of Oxford who developed it, and ‘16’ implies that this architecture has 16 layers. The model got ~93% on the ImageNet test that we mentioned a couple of weeks ago. 

![VGG16](images/vgg16.png "VGG16" )

#### Slide Convolutional Layers from Classifier

When downloading the model we specifiy that we don't want the top - that's the classification part. When we remove the top we also allow the model to adapt to the shape of our images, so we specify the input size as well.

In [95]:
from keras.applications.vgg16 import VGG16
from keras.layers import Input
from keras.models import Model
from keras.applications.vgg16 import preprocess_input

### Preprocessing Data

Our VGG 16 model comes with a preprocessing function to prepare the data in a way it is happy with. For this model the color encoding that it was trained on is different, so we should prepare the data properly to get good results. 

In [96]:
import pathlib
import PIL 
from keras.applications.vgg16 import preprocess_input

dataset_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
data_dir = tf.keras.utils.get_file(origin=dataset_url,
                                   fname='flower_photos',
                                   untar=True)
data_dir = pathlib.Path(data_dir)

#Flowers
batch_size = 32
img_height = 180
img_width = 180

train_ds_orig = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

val_ds_orig = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

class_names = train_ds_orig.class_names
print(class_names)

def preprocess(images, labels):
  return tf.keras.applications.vgg16.preprocess_input(images), labels

train_ds = train_ds_orig.map(preprocess)
val_ds = val_ds_orig.map(preprocess)


Found 3670 files belonging to 5 classes.
Using 2936 files for training.
Found 3670 files belonging to 5 classes.
Using 734 files for validation.
['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']


#### Write Some Images for Tensorboard

We'll record some images, both pre and post processing. The VGG model wants images to use a different representation than RGB. 

In [97]:
file_writer = tf.summary.create_file_writer("logs/VGG/train_data")
train_ds_iterator = train_ds_orig.as_numpy_iterator()
train_proc_iterator = train_ds.as_numpy_iterator()
samp_batch  = train_ds_iterator.next()
proc_batch = train_proc_iterator.next()

with file_writer.as_default():
    images = np.reshape(samp_batch[0].astype("uint8"), (-1, 180, 180, 3))
    procImages = np.reshape(proc_batch[0].astype("uint8"), (-1, 180, 180, 3))
    #images = samp_batch
    tf.summary.image("32 Original Images", images, max_outputs=32, step=0)
    tf.summary.image("32 Processed Images", procImages, max_outputs=32, step=0)

#### Add on New Classifier

If we look at the previous summary of the model we can see that the last layer we have is a MaxPool layer. When making our own CNN this is the last layer before we add in the "normal" stuff for making predictions, this is the same. We need to flatten the data, then use dense layers and an output layer to classify the predictions. 

We end up with the pretrained parts finding features in images, and the custom part classifying images based on those features. If we think back to the concept of a convolutional network, the convolutional layers do the true heavy lifting in allowing us to do things like classify images, they take in the raw images and transform it into a set of features contained in that image. This ability to turn images into predictive features is the key - important parts of images like edges, corners, contrast, etc... are generic, and our borrowed model is excellent at finding these features in images. Our predicitons are unique, so we tweak the training of our model to make predictions for our data, into our classes - all based on the features that the borrowed model found! 

### Make Model

We take the model without the top, set the input image size, and then add our own classifier. Loading the model is simple, there are just a few things to specify:
<ul>
<li> weights="imagenet" - tells the model to use the weights from its imagenet training. This is what brings the "smarts", so we want it. 
<li> include_top=False - tells the model to not bring over the classifier bits that we wnat to replace. 
<li> input_shape - the model is trained on specific data sizes (224x224x3). We can repurpose it by changing the input size. 
</ul>

We also set the VGG model that we download to be not trainable. We don't want to overwrite all of the training that already exists, coming from the original training. What we want to be trained are the final dense parts we added on to classify our specific scenario. All the weights in the convolutional layers are kept the same, as they have been developed through large amounts of training; the weights in the fully connected layers will be trained, resulting in a model that combines the "sight" of the pretrained model with the context of what we are trying to classify. The VGG bits will just show as though they are one layer in our model, and for training purposes that makes sense. We can also see in the "trainable params" listing in the summary, the large number of weights in that VGG section we are borrowing are not trainable - that's the smart part of the model. 

<b>Note:</b> I think the "top" label is a bit misleading, as it isn't really the top, it is the part at the end that shows at the bottom of a summary. 

In [98]:
## Loading VGG16 model
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(180,180,3))
base_model.trainable = False ## Not trainable weights

# Add Dense Stuff
flatten_layer = Flatten()
dense_layer_1 = Dense(512, activation='relu', kernel_regularizer='l2', bias_regularizer='l2')
drop_layer_1 = Dropout(.2)
dense_layer_2 = Dense(256, activation='relu', kernel_regularizer='l2', bias_regularizer='l2')
prediction_layer = Dense(5)

model = Sequential([
    base_model,
    flatten_layer,
    dense_layer_1,
    drop_layer_1,
    dense_layer_2,
    prediction_layer
])

model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 5, 5, 512)         14714688  
                                                                 
 flatten_5 (Flatten)         (None, 12800)             0         
                                                                 
 dense_19 (Dense)            (None, 512)               6554112   
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense_20 (Dense)            (None, 256)               131328    
                                                                 
 dense_21 (Dense)            (None, 5)                 1285      
                                                                 
Total params: 21,401,413
Trainable params: 6,686,725
N

#### Compile and Train

Once the new Frakenstein model is built we finish the training process as we normally would. The only difference is that here the weights of the VGG part of the model are not being adjusted during the backpropagation steps, only the weights in the layers that we added at the end are. For many, if not most, applications, this approach of adapting a pretrained model will give the best real world results. Unless you happen to live in a data centre, you probably lack both the data and the processing capacity to train any model from scratch to be as good as those that we can download. 

In [109]:
# Model
model.compile(
    optimizer=tf.keras.optimizers.Adam(),  
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
    run_eagerly=False
)

time_stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = "logs/VGG/initial/" + time_stamp
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, write_graph=True, write_images=True)
stopping_callback = EarlyStopping(monitor='val_accuracy', patience=3, restore_best_weights=True, mode="max")
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath="weights/VGG/initial/"+time_stamp+"model.hdf5", save_best_only=True, monitor='val_accuracy', mode='max')

model.fit(train_ds,
            epochs=epochs,
            verbose=1,
            validation_data=val_ds,
            callbacks=[tensorboard_callback, stopping_callback, checkpoint_callback])



<keras.callbacks.History at 0x7f84c9c442e0>

### Fine Tune Models

Lastly, we can adapt the entire model to our data. We'll unfreeze the original model, and then train the model again. The key addition here is that we set the learning rate to be extremely low (here it is 2 orders of magnitude smaller than the default) so the model doesn't totally rewrite all of the weights while training, rather it will only change a little bit - fine tuning its predictions to the actual data! Here the oringal convolutional layers are trainable, and the weights will be adjusted during training, but we dial the learning rate way down so that our changes only impact the model a little bit. This is a greater degree of fine tuning than we get when we lock the VGG layers, but it is still mainly relying on the previous training of the VGG model.

The end result is a model that can take advantage of all of the training that the original model received before we downloaded it. That ability of extracting features from images is then reapplied to our data for making predictions based on the features identified in the original model. Finally we take the entire model and just gently train it to be a little more suited to our data. The best of all worlds!

In [41]:
#Save a copy of the above model for next test. 
copy_model = model

base_model.trainable = True
model.summary()

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),  # Low learning rate
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=keras.metrics.SparseCategoricalAccuracy(name="accuracy")
)

time_stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = "logs/VGG/fine_tune/" + time_stamp
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, write_graph=True, write_images=True)
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath="weights/VGG/fine_tune/"+time_stamp+"model.hdf5", save_best_only=True, monitor='val_accuracy', mode='max')

model.fit(train_ds, epochs=epochs, validation_data=val_ds, verbose=1, callbacks=[tensorboard_callback, stopping_callback, checkpoint_callback])

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 5, 5, 512)         14714688  
                                                                 
 flatten_3 (Flatten)         (None, 12800)             0         
                                                                 
 dense_12 (Dense)            (None, 512)               6554112   
                                                                 
 dense_13 (Dense)            (None, 256)               131328    
                                                                 
 dense_15 (Dense)            (None, 5)                 1285      
                                                                 
Total params: 21,401,413
Trainable params: 21,401,413
Non-trainable params: 0
_________________________________________________________________


<keras.callbacks.History at 0x7f84a01ac100>

### Transfer + Fine Tuning Results

Yay, that's probably pretty accurate! In initial testing with 1 epoch, I got results around 80% before the fine tuning, and over 85% after the fine tuning. That's with 1 epoch! Other runs where we allow it to tune more trend to be even better - allowing 5 epochs of training + 5 epochs of fine tuning, my validation accuracy was around 90% and the training accuracy was nearing 100% - we could likely do even better with more aggressive regularization. 

This will likely be a great approach for something like image recognition!

### Where is the Model Looking?

Note - this uses some external stuff and an old version of scipy. 

In [132]:
model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_9 (InputLayer)           [(None, 180, 180, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 186, 186, 3)  0           ['input_9[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 90, 90, 64)   9472        ['conv1_pad[0][0]']              
                                                                                                  
 conv1_bn (BatchNormalization)  (None, 90, 90, 64)   256         ['conv1_conv[0][0]']      

# Visualize

In [133]:
if IN_COLAB: 
    CATEGORICAL_INDEX = 1
    SEED_INPUT = 123
    SEED_INPUT_IMAGE = images[0]
    #import numpy as np
    #from matplotlib import pyplot as plt
    !ip install tf-keras-vis tensorflow
    from matplotlib import cm
    from tf_keras_vis.gradcam_plus_plus import GradcamPlusPlus
    from tf_keras_vis.utils.model_modifiers import ReplaceToLinear
    from tf_keras_vis.utils.scores import CategoricalScore

    # Create GradCAM++ object
    gradcam = GradcamPlusPlus(model,
                            model_modifier=ReplaceToLinear(),
                            clone=True)

    # Generate cam with GradCAM++
    cam = gradcam(CategoricalScore(CATEGORICAL_INDEX),
                SEED_INPUT)

    ## Since v0.6.0, calling `normalize()` is NOT necessary.
    # cam = normalize(cam)

    plt.imshow(SEED_INPUT_IMAGE)
    heatmap = np.uint8(cm.jet(cam[0])[..., :3] * 255)
    plt.imshow(heatmap, cmap='jet', alpha=0.5) # overlay

ImportError: cannot import name 'imresize' from 'scipy.misc' (/Users/akeems/opt/anaconda3/envs/ml3950/lib/python3.9/site-packages/scipy/misc/__init__.py)

## More Drastic Retraining

If we are extra ambitious we can also potentially slice the model even deeper, and take smaller portions to mix with our own models. The farther "into" the model you slice, the more of the original training will be removed and the more the model will learn from our training data. If done, this is a balancing act - we want to keep all of the smarts that the model has gotten from the original training, while getting the benefits of adaptation to our data. 

This is something that is hard to just eyeball - to splice parts of models together and create something that is actually superior likely requries a lot of experimentation, a solid understanding of the model's problem you're addressing, and some domain knowledge. For something like this adaptation of the VGG model, we'd probably start with some idea of what the model was weak at, build an understanding of what types of features it was extracting along the way, and insert our own layers where we think it would be most beneficial. 

In [42]:
## Loading VGG16 model
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(180,180,3))
#base_model.trainable = False ## Not trainable weights
base_model.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 180, 180, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 180, 180, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 180, 180, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 90, 90, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 90, 90, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 90, 90, 128)       147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 45, 45, 128)       0     

##### Freeze the First 12 Layers

We will set the first 12 layers to be frozen, and leave the rest open to be trained. 

In [43]:
for layer in base_model.layers[:12]:
    layer.trainable = False
base_model.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 180, 180, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 180, 180, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 180, 180, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 90, 90, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 90, 90, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 90, 90, 128)       147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 45, 45, 128)       0     

#### More Retraining

Now we have larger portions of the model that can be trained. We will be losing some of the pretrained knowldge, replacing it with the training coming from our data. If we look at the trainable params above, there are a bunch that are trainable and a bunch that aren't.

We are playing with fire here! Taking away more and more of the "smart" model will be risky for actual performance, we are pretty likely to make things worse as we go father and farther into removing the old training. 

In [44]:
# Add Dense Stuff
flatten_layer = Flatten()
dense_layer_1 = Dense(512, activation='relu', kernel_regularizer='l2', bias_regularizer='l2')
dense_layer_2 = Dense(256, activation='relu', kernel_regularizer='l2', bias_regularizer='l2')
prediction_layer = Dense(5)

model = Sequential([
    base_model,
    flatten_layer,
    dense_layer_1,
    dense_layer_2,
    prediction_layer
])

model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 5, 5, 512)         14714688  
                                                                 
 flatten_4 (Flatten)         (None, 12800)             0         
                                                                 
 dense_16 (Dense)            (None, 512)               6554112   
                                                                 
 dense_17 (Dense)            (None, 256)               131328    
                                                                 
 dense_18 (Dense)            (None, 5)                 1285      
                                                                 
Total params: 21,401,413
Trainable params: 18,485,765
Non-trainable params: 2,915,648
_________________________________________________________________


In [45]:
# Model
model.compile(
    optimizer=tf.keras.optimizers.Adam(), 
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=keras.metrics.SparseCategoricalAccuracy(name="accuracy")
)
            
time_stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = "logs/VGG/drastic/" + time_stamp
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, write_graph=True, write_images=True)
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath="weights/VGG/drastic/"+time_stamp+"model.hdf5", save_best_only=True, monitor='val_accuracy', mode='max')

model.fit(train_ds,
            epochs=epochs,
            verbose=1,
            validation_data=val_ds,
            callbacks=[tensorboard_callback, stopping_callback, checkpoint_callback])



KeyboardInterrupt: 

#### Results

We likely see worse results when retraining more of the model, that's to be expected. In general, replacing the classifier and possibly some low learning rate fine tuning is the best solution for most cases like this.

## Exercise - ResNet50

This is another pretrained network, containing 50 layers. We can use this one similarly to the last. Try to use transfer learning along with some of your added layers to predict. 

In [None]:
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions

def preprocess50(images, labels):
  return tf.keras.applications.resnet50.preprocess_input(images), labels

train_ds = train_ds_orig.map(preprocess50)
val_ds = val_ds_orig.map(preprocess50)

In [None]:
# Make Model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(180,180,3))
base_model.trainable = False ## Not trainable weights

# Add Dense Stuff
flatten_layer = Flatten()
dense_layer_1 = Dense(512, activation='relu', kernel_regularizer='l2', bias_regularizer='l2')
dense_layer_2 = Dense(256, activation='relu', kernel_regularizer='l2', bias_regularizer='l2')
dense_layer_3 = Dense(96, activation='relu', kernel_regularizer='l2', bias_regularizer='l2')
prediction_layer = Dense(5)

model = Sequential([
    base_model,
    flatten_layer,
    dense_layer_1,
    dense_layer_2,
    dense_layer_3,
    prediction_layer
])

model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 6, 6, 2048)        23587712  
                                                                 
 flatten_8 (Flatten)         (None, 73728)             0         
                                                                 
 dense_20 (Dense)            (None, 50)                3686450   
                                                                 
 dense_21 (Dense)            (None, 20)                1020      
                                                                 
 dense_22 (Dense)            (None, 5)                 105       
                                                                 
Total params: 27,275,287
Trainable params: 

##### Train New Classifier

Train model with new classifier. 

In [None]:
# Model
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
            optimizer="adam", 
            metrics=keras.metrics.SparseCategoricalAccuracy(name="accuracy"))
            
log_dir = "logs/50/initial/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath="weights/50/initial/"+time_stamp+"model.hdf5", save_best_only=True, monitor='val_accuracy', mode='max')

model.fit(train_ds,
            epochs=epochs,
            verbose=1,
            validation_data=val_ds,
            callbacks=[tensorboard_callback, stopping_callback, checkpoint_callback])

Epoch 1/2
 1/92 [..............................] - ETA: 18:14 - loss: 2.6699 - accuracy: 0.3125

KeyboardInterrupt: 

##### Attempt Retraining Entire Model to Fine Tune

We can attempt to unlock the model and retrain in fine tuning. 

In [None]:
base_model.trainable = True
model.summary()

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-6),  # Low learning rate
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=keras.metrics.SparseCategoricalAccuracy(name="accuracy")
)

log_dir = "logs/50/fine_tune/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath="weights/50/fine_tune/"+time_stamp+"model.hdf5", save_best_only=True, monitor='val_accuracy', mode='max')

model.fit(train_ds, epochs=epochs, validation_data=val_ds, verbose=1, callbacks=[tensorboard_callback, stopping_callback, checkpoint_callback])

### Transfer Learning Conclusion

Transfer learning is common, especially when working with things like images. Pretrained models that have seen millions upon millions of images get very good at "understanding" what is in an image, or extracting important features from those images. This basic ability to "see" image data is interchangeable between different types of image tasks that we may want to do. For image data, natural language, audio, video, it is likely that one of these large models will be more capable of extracting features from the data than we could ever hope to do from scratch. Since the basics of "seeing a thing" or "reading a sentence" is the same no matter the specific application, that ability to process the data that our pretrained models have can be repurposed to our specific ends. 

We can see lots of scenarios in the real world where people are adapting image recognition models trained by Google to do things like recognize objects in their home security system, or language models like the GPT family being adapted to better understand domain specific language. We'll likely see more of this, as the benefits of training on massive amounts of data are hard, if not impossible, to replicate. 

In [87]:
!pip install keras-vis

Collecting keras-vis
  Downloading keras_vis-0.4.1-py2.py3-none-any.whl (30 kB)
Installing collected packages: keras-vis
Successfully installed keras-vis-0.4.1


In [113]:
from vis.losses import ActivationMaximization
from vis.regularizers import TotalVariation, LPNorm
#from vis.modifiers import Jitter
from vis.optimizer import Optimizer
from vis.utils import utils
from vis.callbacks import GifGenerator
from tensorflow.keras.applications.resnet50 import ResNet50

In [116]:
model = ResNet50(weights='imagenet', include_top=True, input_shape=(180,180,3))
model.trainable = False ## Not trainable weights
model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_9 (InputLayer)           [(None, 180, 180, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 186, 186, 3)  0           ['input_9[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 90, 90, 64)   9472        ['conv1_pad[0][0]']              
                                                                                                  
 conv1_bn (BatchNormalization)  (None, 90, 90, 64)   256         ['conv1_conv[0][0]']      

In [117]:

layer_name = 'conv5_block3_out'
layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]])
output_class = [1]

losses = [
    (ActivationMaximization(layer_dict[layer_name], output_class), 2),
    (LPNorm(model.input), 5),
    (TotalVariation(model.input), 5)
]
opt = Optimizer(model.input, losses)
opt.minimize(max_iter=500, verbose=True, callbacks=[GifGenerator('opt_progress')])

RuntimeError: tf.gradients is not supported when eager execution is enabled. Use tf.GradientTape instead.

#### Visualize Saliency

In [118]:
layer_idx = utils.find_layer_idx(model, 'conv5_block3_out')

# Swap softmax with linear
model.layers[layer_idx].activation = activations.linear
model = utils.apply_modifications(model)

plt.rcParams['figure.figsize'] = (18, 6)

img1 = utils.load_img('../vggnet/images/ouzel1.jpg', target_size=(224, 224))
img2 = utils.load_img('../vggnet/images/ouzel2.jpg', target_size=(224, 224))

f, ax = plt.subplots(1, 2)
ax[0].imshow(img1)
ax[1].imshow(img2)

NameError: name 'activations' is not defined

In [None]:
from vis.visualization import visualize_saliency, overlay
from vis.utils import utils
from keras import activations

# Utility to search for layer index by name. 
# Alternatively we can specify this as -1 since it corresponds to the last layer.
layer_idx = utils.find_layer_idx(model, 'fc1000')

f, ax = plt.subplots(1, 2)
for i, img in enumerate([img1, img2]):    
    # 20 is the imagenet index corresponding to `ouzel`
    grads = visualize_saliency(model, layer_idx, filter_indices=20, seed_input=img)
    
    # visualize grads as heatmap
    ax[i].imshow(grads, cmap='jet')

In [131]:
import matplotlib.cm as cm
from vis.visualization import visualize_cam


penultimate_layer = utils.find_layer_idx(model, 'conv2_block1_1_conv')

for modifier in [None, 'guided', 'relu']:
    plt.figure()
    f, ax = plt.subplots(1, 2)
    plt.suptitle("vanilla" if modifier is None else modifier)
    for i, img in enumerate([img1, img2]):    
        # 20 is the imagenet index corresponding to `ouzel`
        grads = visualize_cam(model, layer_idx, filter_indices=20, 
                              seed_input=img, penultimate_layer_idx=penultimate_layer,
                              backprop_modifier=modifier)        
        # Lets overlay the heatmap onto original image.    
        jet_heatmap = np.uint8(cm.jet(grads)[..., :3] * 255)
        ax[i].imshow(overlay(jet_heatmap, img))

ImportError: cannot import name 'imresize' from 'scipy.misc' (/Users/akeems/opt/anaconda3/envs/ml3950/lib/python3.9/site-packages/scipy/misc/__init__.py)