# VGG16 Spatial Action recognition

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from skimage import transform, filter
from skimage.color import rgb2gray
import sys, pylab, operator, csv
import util
import os
import urllib
import imageio
import itertools
import skimage.io
import caffe
caffe.set_mode_gpu()

import visualisation
import transformers
import excitation_backprop
import debug
import beoid

import cnn_utils

In [None]:
%matplotlib inline
plt.rcParams['figure.dpi'] = 110
%load_ext autoreload
%autoreload 2

In [None]:
model_root_path = "/home/will/nets/vgg_16_ucf101/"
model_name = "cuhk_action_spatial_vgg_16_split2"

deploy_prototxt_path = os.path.join(model_root_path, model_name + "_deploy.prototxt")
caffemodel_path = os.path.join(model_root_path, model_name + ".caffemodel")


net = caffe.Net(deploy_prototxt_path,
                caffemodel_path,
                caffe.TEST)

# The very last layer in the network (pre loss layer)
topLayerName = 'fc8-1'
topBlobName = net.top_names[topLayerName][0]
secondTopLayerName = 'fc7'
secondTopBlobName = net.top_names[secondTopLayerName][0]

# This is the layer we'll stop at when excitation backpropping
outputLayerName = 'pool3'

In [None]:
debug.filter_shapes(net)

In [None]:
def crop(image, size):
    old_width = image.shape[0]
    new_width = size[0]
    assert old_width >= new_width
    
    old_height = image.shape[1]
    new_height = size[1]
    assert old_height >= new_height
    
    horizontal_crop = int(np.ceil((old_width - new_width) / 2))
    vertical_crop = int(np.ceil((old_height - new_height) / 2))
    
    return image[
        horizontal_crop:-horizontal_crop,
        vertical_crop:-vertical_crop
    ].reshape(*new_size)

In [None]:
data_root = "/home/will/thesis/generated/ucf101/test-1/frames/"
image_path = os.path.join(data_root, "v_BoxingPunchingBag_g07_c04/frame000005.jpg")
                        
input_image = caffe.io.load_image(image_path)
skimage.io.Image(input_image)

In [None]:
new_size = (224, 224, 3)
image = crop(input_image, new_size)

skimage.io.Image(image)

In [None]:
image.shape

In [None]:
for layer in net.params.keys():
    weights = net.params[layer][0].data
    biases = net.params[layer][1].data
    
    print(layer)

    print("Biases:  [", np.min(biases), ", ", np.max(biases), "]")
    print("Weights: [", np.min(weights), ", ", np.max(weights), "]")

In [None]:
eb = excitation_backprop.ExcitationBackprop(net, 'fc8-1', 'fc7', 'pool3')
transformer = transformers.imagenet_transformer(net)
preprocessed_image = transformer.preprocess('data', image)
print("Input min: ", np.min(preprocessed_image))
print("Input max: ", np.max(preprocessed_image))
net.blobs['data'].data[...] = preprocessed_image.reshape(1, 3, 224, 224)
out = net.forward(end = topLayerName)

print("Output layer max:", np.max(net.blobs[eb.top_blob_name].data))
print("Output layer min:", np.min(net.blobs[eb.top_blob_name].data))

class_count = 101
scores = net.blobs[eb.top_blob_name].data[0].reshape(class_count, -1).max(1).flatten() # pre-softmax scores
class_ids = scores.argsort()[-3:][::-1]

In [None]:
layers = map(lambda x: x, net.blobs)
pooling_layers = [layer for layer in layers if  'pool' in layer]
pooling_layers.reverse()

In [None]:
def image_grid(images, square=True):
    image_count = len(images)
    if square:
        width = int(np.ceil(np.sqrt(image_count)))
        height = width
    else:
        width = image_count
        height = 1
    
    fig, axes = plt.subplots(height, width)
    for i, image in enumerate(images):
        axes.flat[i].imshow(image)
        
    for ax in axes.flat:
        ax.set_xticks([])
        ax.set_yticks([])
        for spine in ax.spines.values():
            spine.set_visible(False)
            
    
    return (fig, axes)

def attention_map_grid(image, attention_maps, labels=None, square=True):
    overlaid_attention_maps = list(map(
        lambda attention_map: visualisation.overlay_attention_map(image, attention_map),
        attention_maps
    ))
    (fig, axes) = image_grid(overlaid_attention_maps, square=square)
    if labels is not None:
        for (ax, label) in zip(axes.flat, labels):
            ax.set_xlabel(label)
            
    return (fig, axes)

attention_maps = []
for pooling_layer in pooling_layers:
    eb = excitation_backprop.ExcitationBackprop(net, 'fc8-1', 'fc7', pooling_layer)
    attention_maps.append(eb.backprop(class_ids[0]))
    
(fig, _) = attention_map_grid(rgb2gray(image), attention_maps, pooling_layers, square=False)
fig.set_size_inches(7, 2)
fig.savefig("ebp-pooling-layer-sizes.pdf")