In [1]:
import os
import sys
import gzip
import time
import pickle
import datetime
import random
import numpy as np
import pandas as pd

import theano
from theano import tensor as T

import lasagne
from lasagne.updates import nesterov_momentum, adam
from lasagne.layers import helper
from lasagne.layers import get_output, InputLayer, DenseLayer, Upscale2DLayer, ReshapeLayer
from utils import load_pickle_data_test, load_pickle_data_cv

variant = 'wide'
depth = 3
width = 8

Using gpu device 0: TITAN X (Pascal) (CNMeM is disabled, cuDNN 5105)


In [2]:
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]


In [3]:
class LogisticRegression(object):
    def __init__(self, X, y, num_units=10):
        self.X = X
        self.y = y
        self.input_var = T.matrix('inputs')
        self.target_var = T.ivector('targets')
        self.l_in = InputLayer(shape=(None, X.shape[1]), input_var=self.input_var)
        self.l_out = DenseLayer(self.l_in, num_units=num_units, nonlinearity=lasagne.nonlinearities.softmax)
        self.prediction = lasagne.layers.get_output(self.l_out)
        self.loss = lasagne.objectives.categorical_crossentropy(self.prediction, self.target_var)
        self.loss = self.loss.mean()
        self.params = lasagne.layers.get_all_params(self.l_out, trainable=True)
        self.updates = lasagne.updates.adadelta(self.loss, self.params, learning_rate=1)
        
        self.test_prediction = lasagne.layers.get_output(self.l_out, deterministic=True)
        self.test_loss = lasagne.objectives.categorical_crossentropy(self.test_prediction,
                                                            self.target_var)
        self.test_loss = self.test_loss.mean()
        self.test_acc = T.mean(T.eq(T.argmax(self.test_prediction, axis=1), self.target_var),
                      dtype=theano.config.floatX)
        self.train_fn = theano.function([self.input_var, self.target_var],
                                        self.loss, updates=self.updates)
        self.acc_fn = theano.function([self.input_var, self.target_var], 
                                      self.test_acc)
        self.train()
        
    def train(self, num_epochs=50, batch_size=1000):
        for epoch in range(num_epochs):
            train_err = 0
            train_batches = 0
            for batch in iterate_minibatches(self.X, self.y, batch_size, shuffle=False):
                inputs, targets = batch
                train_err += self.train_fn(inputs, targets)
                train_batches += 1

    def eval_acc(self, X_test, y_test):
        test_acc = 0
        test_batches = 0
        for batch in iterate_minibatches(X_test, y_test, 1000, shuffle=False):
            inputs, targets = batch
            test_acc += self.acc_fn(inputs, targets)
            test_batches += 1
        self.test_acc = test_acc/test_batches
        print('overall acc is: {}'.format(self.test_acc))

In [4]:
if variant == 'normal':
    from models import ResNet_FullPreActivation as ResNet
elif variant == 'bottleneck':
    from models import ResNet_BottleNeck_FullPreActivation as ResNet
elif variant == 'wide':
    from models import ResNet_FullPre_Wide as ResNet
else:
    print ('Unsupported model %s' % variant)


In [5]:
BATCHSIZE = 1

'''
Set up all theano functions
'''
X = T.tensor4('X')
Y = T.ivector('y')

# set up theano functions to generate output by feeding data through network, any test outputs should be deterministic
# load model
if width > 1:
    output_layer = ResNet(X, n=depth, k=width)
else:
    output_layer = ResNet(X, n=depth)
output_test = lasagne.layers.get_output(output_layer, deterministic=True)

output_class = T.argmax(output_test, axis=1)

# set up training and prediction functions
predict_proba = theano.function(inputs=[X], outputs=output_test)
predict_class = theano.function(inputs=[X], outputs=output_class)

'''
Load data and make predictions
'''
test_X, test_y = load_pickle_data_test()

# load network weights
f = gzip.open('weights/%s%d_resnet.pklz'%(variant,depth), 'rb')
all_params = pickle.load(f, encoding='latin1')
f.close()
helper.set_all_param_values(output_layer, all_params)

#make predictions
pred_labels = []
for j in range((test_X.shape[0] + BATCHSIZE - 1) // BATCHSIZE):
    sl = slice(j * BATCHSIZE, (j + 1) * BATCHSIZE)
    X_batch = test_X[sl]
    pred_labels.extend(predict_class(X_batch))

pred_labels = np.array(pred_labels)
print(pred_labels.shape)

'''
Compare differences
'''
same = 0
for i in range(pred_labels.shape[0]):
    if test_y[i] == pred_labels[i]:
        same += 1

print('Accuracy on the testing set, ', (float(same) / float(pred_labels.shape[0])))

(10000,)
Accuracy on the testing set,  0.9542


In [7]:
layer_list = lasagne.layers.get_all_layers(output_layer)
residual_list = [layer for layer in layer_list if isinstance(layer, lasagne.layers.merge.ElemwiseSumLayer)]

In [8]:
residual_list

[<lasagne.layers.merge.ElemwiseSumLayer at 0x7f5630786fd0>,
 <lasagne.layers.merge.ElemwiseSumLayer at 0x7f5630795fd0>,
 <lasagne.layers.merge.ElemwiseSumLayer at 0x7f56307a3048>,
 <lasagne.layers.merge.ElemwiseSumLayer at 0x7f563072b1d0>,
 <lasagne.layers.merge.ElemwiseSumLayer at 0x7f56307351d0>,
 <lasagne.layers.merge.ElemwiseSumLayer at 0x7f563073d208>,
 <lasagne.layers.merge.ElemwiseSumLayer at 0x7f5630744240>,
 <lasagne.layers.merge.ElemwiseSumLayer at 0x7f563074e278>,
 <lasagne.layers.merge.ElemwiseSumLayer at 0x7f5630754400>,
 <lasagne.layers.merge.ElemwiseSumLayer at 0x7f563075d400>,
 <lasagne.layers.merge.ElemwiseSumLayer at 0x7f5630766438>,
 <lasagne.layers.merge.ElemwiseSumLayer at 0x7f56306ee470>,
 <lasagne.layers.merge.ElemwiseSumLayer at 0x7f56306f84a8>]

In [9]:
len(residual_list)

13

In [21]:
test_list = [residual_list[2], residual_list[7], residual_list[12]]

In [23]:
output_list = lasagne.layers.get_output(test_list)
output_fn = theano.function([X], output_list)

In [24]:
train_X, valid_X, train_y, valid_y = load_pickle_data_cv()

[[[ 130.59715271  130.01402283  130.915802   ...,  131.06472778
    130.38645935  130.18037415]
  [ 129.99595642  129.21817017  130.07235718 ...,  130.04629517
    129.33551025  129.18457031]
  [ 129.65206909  128.61184692  129.34715271 ...,  129.20292664
    128.58711243  128.63717651]
  ..., 
  [ 126.54090881  124.57240295  123.92199707 ...,  124.16797638
    124.59146881  125.38682556]
  [ 127.05977631  125.47117615  125.29917908 ...,  125.40764618
    125.47453308  125.90288544]
  [ 127.75404358  126.63544464  126.93002319 ...,  126.72489166
    126.48709106  126.57411194]]

 [[ 136.01364136  135.38494873  136.17668152 ...,  136.37138367
    135.76266479  135.53540039]
  [ 135.25456238  134.39224243  135.0798645  ...,  135.20358276
    134.62319946  134.46524048]
  [ 134.64434814  133.45162964  133.97103882 ...,  134.01547241
    133.59587097  133.65266418]
  ..., 
  [ 125.99935913  123.7685318   122.84073639 ...,  122.9366684
    123.65466309  124.74809265]
  [ 126.4221344   124.6

In [25]:
train_X.shape

(45000, 3, 32, 32)

In [53]:
batchsize = 500
feature_list = [None,None,None]
for batch in iterate_minibatches(train_X, train_y, batchsize):
    inputs, _ = batch
    temp = output_fn(inputs)
    for i in range(3):
        if feature_list[i] == None:
            feature_list[i] = temp[i]
        else:
            feature_list[i] = np.concatenate((feature_list[i], temp[i]))



In [54]:
feature_list[0].shape

(45000, 128, 32, 32)

In [55]:
feature_list[1].shape

(45000, 256, 16, 16)

In [56]:
feature_list[2].shape

(45000, 512, 8, 8)

In [58]:
np.savez('./learned_features/features', feature_list[0], feature_list[1], feature_list[2])

In [77]:
train_feature = feature_list[2].reshape(45000,-1)

In [81]:
lr = LogisticRegression(train_feature, train_y)

In [80]:
batchsize = 500
test_feature = None
for batch in iterate_minibatches(test_X, test_y, batchsize):
    inputs, _ = batch
    temp = output_fn(inputs)
    if test_feature == None:
        test_feature = temp[2]
    else:
        test_feature = np.concatenate((test_feature, temp[2]))



In [82]:
lr.eval_acc(test_feature.reshape(10000,-1), test_y)

overall acc is: 0.9453000009059906
