# Example using pretrained ResNet 50-layer model

In [1]:
import os, urllib
def download(url):
    filename = url.split("/")[-1]
    if not os.path.exists(filename):
        urllib.request.urlretrieve(url, filename)
def get_model(prefix, epoch):
    download(prefix+'-symbol.json')
    download(prefix+'-%04d.params' % (epoch,))

get_model('http://data.mxnet.io/models/imagenet/resnet/50-layers/resnet-50', 0)

In [2]:
import mxnet as mx
sym, arg_params, aux_params = mx.model.load_checkpoint('resnet-50', 0)

Both argument parameters and auxiliary parameters (e.g mean/std in batch normalization layer) are stored as a dictionary of string name and ndarray value (see ndarray.ipynb). The arguments contain consist of weight and bias.

In [18]:
#arg_params

## Extract Features
Sometime we want the internal outputs from a neural network rather than then final predicted probabilities. In this way, the neural network works as a feature extraction module to other applications.

A loaded symbol in default only returns the last layer as output. But we can get all internal layers by get_internals, which returns a new symbol outputting all internal layers. The following codes print the last 10 layer names.

We can also use mx.viz.plot_network(sym) to visually find the name of the layer we want to use. The name conventions of the output is the layer name with _output as the postfix.

In [3]:
all_layers = sym.get_internals()
all_layers.list_outputs()[-10:-1]

['bn1_moving_var',
 'bn1_output',
 'relu1_output',
 'pool1_output',
 'flatten0_output',
 'fc1_weight',
 'fc1_bias',
 'fc1_output',
 'softmax_label']

Often we want to use the output before the last fully connected layers, which may return semantic features of the raw images but not too fitting to the label yet. In the ResNet case, it is the flatten layer with name flatten0 before the last fullc layer. The following codes get the new symbol sym3 which use the flatten layer as the last output layer, and initialize a new module.


In [3]:
all_layers = sym.get_internals()
sym3 = all_layers['flatten0_output']
mod3 = mx.mod.Module(symbol=sym3, label_names=None, context=mx.cpu())
mod3.bind(for_training=False, data_shapes=[('data', (1,3,224,224))])
mod3.set_params(arg_params, aux_params)

## Prepare data

We first obtain the synset file, in which the i-th line contains the label for the i-th class.

In [4]:
import os.path

name = "synset.txt"
if not os.path.isfile(name):
    download('http://data.mxnet.io/models/imagenet/resnet/synset.txt')

with open('synset.txt') as f:
    synsets = [l.rstrip() for l in f]

We next download 1000 images for testing, which were not used for the training. 

In [None]:
import tarfile

name = "val_1000.tar"
if not os.path.isfile(name):
    download('http://data.mxnet.io/data/val_1000.tar')
    
if not os.path.isfile("val_1000/label"):
    tfile = tarfile.open('val_1000.tar')
    tfile.extractall()

with open('val_1000/label') as f:
    val_label = [int(l.split('\t')[0]) for l in f]

Visualize the first 8 images.

In [None]:
%matplotlib inline
import matplotlib
matplotlib.rc("savefig", dpi=100)
import matplotlib.pyplot as plt
import cv2
for i in range(0,8):
    img = cv2.cvtColor(cv2.imread('val_1000/%d.jpg' % (i,)), cv2.COLOR_BGR2RGB)
    plt.subplot(2,4,i+1)
    plt.imshow(img)
    plt.axis('off')
    label = synsets[val_label[i]]
    label = ' '.join(label.split(',')[0].split(' ')[1:])
    plt.title(label)

Next we define a function that reads one image each time and convert to a format can be used by the model. Here we use a naive way that resizes the original image into the desired shape, and change the data layout. 

In [None]:
import numpy as np
import cv2
def get_image(filename):
    img = cv2.imread(filename)  # read image in b,g,r order
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)   # change to r,g,b order
    img = cv2.resize(img, (224, 224))  # resize to 224*224 to fit model
    img = np.swapaxes(img, 0, 2)
    img = np.swapaxes(img, 1, 2)  # change to (channel, height, width)
    img = img[np.newaxis, :]  # extend to (example, channel, heigth, width)
    return img

Finally we define a input data structure which is acceptable by mxnet. The field `data` is used for the input data, which is a list of NDArrays. 

In [None]:
from collections import namedtuple
Batch = namedtuple('Batch', ['data'])

Now we can do feature extraction using forward1 as before. Notice that the last convolution layer uses 2048 channels, and we then perform an average pooling, so the output size of the flatten layer is 2048.



In [None]:
# define 
img = get_image('val_1000/1.jpg')

In [None]:
mod3.forward(Batch([mx.nd.array(img)]))
out = mod3.get_outputs()[0].asnumpy()
print(out.shape)
out

## Add a new output layer with new classes

Set context.

In [None]:
from mxnet import nd, autograd, gluon


ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu()
data_ctx = ctx
model_ctx = ctx

Allocate parameters for the output layer.

In [None]:
#######################
#  Allocate parameters for the output layer
#######################
num_input = 2048
num_outputs = 2
weight_scale = .01

Wlast = nd.random_normal(shape=(num_input, num_outputs), scale=weight_scale, ctx=model_ctx)
blast = nd.random_normal(shape=num_input, scale=weight_scale, ctx=model_ctx)

params = [Wlast, blast]

In [None]:
def softmax(y_linear):
    exp = nd.exp(y_linear-nd.max(y_linear))
    partition = nd.nansum(exp, axis=0, exclude=True).reshape((-1, 1))
    return exp / partition

In [None]:
def naic(cnn_feature_vector):
    #let's apply the softmax later
    return cnn_feature_vector * Wlast + blast

