# MNIST test accuracy    SVM: 92.450%  LR: 91.230%
# MNIST first 16 pcomp test accuracy    SVM: 88.335%  LR: 82.710%
# MNIST first 256 pcomp test accuracy    SVM: 92.795%  LR: 90.850%

In [1]:
import sys
import os
import time
import numpy as np
import theano
import theano.tensor as T
import lasagne
import sklearn
from sklearn.linear_model import LogisticRegression as LR


Using gpu device 0: GeForce GTX 750 Ti (CNMeM is disabled, cuDNN Version is too old. Update to v5, was 3007.)


In [2]:
from mnist import iterate_minibatches, load_dataset

In [3]:
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()

In [4]:
def build_cnn(input_var=None, num_conv = 32, mid_neurons = 256, initializer = lasagne.init.Normal,\
              nonlinearity=lasagne.nonlinearities.rectify):
    # As a third model, we'll create a CNN of two convolution + pooling stages
    # and a fully-connected hidden layer in front of the output layer.

    # Input layer, as usual:
    network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
                                        input_var=input_var)
    # This time we do not apply input dropout, as it tends to work less well
    # for convolutional layers.

    # Convolutional layer with 32 kernels of size 5x5. Strided and padded
    # convolutions are supported as well; see the docstring.
    network = lasagne.layers.Conv2DLayer(
            network, num_filters=num_conv, filter_size=(5, 5),
            nonlinearity=nonlinearity,
            W=initializer())
    # Expert note: Lasagne provides alternative convolutional layers that
    # override Theano's choice of which implementation to use; for details
    # please see http://lasagne.readthedocs.org/en/latest/user/tutorial.html.

    # Max-pooling layer of factor 2 in both dimensions:
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))

    # Another convolution with 32 5x5 kernels, and another 2x2 pooling:
    network = lasagne.layers.Conv2DLayer(
            network, num_filters=2 * num_conv, filter_size=(5, 5),
            nonlinearity=nonlinearity,
            W=initializer())
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))

    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    network = lasagne.layers.DenseLayer(
            network,
            num_units=mid_neurons,
            nonlinearity=nonlinearity,
            W=initializer())

    # And, finally, the 10-unit output layer with 50% dropout on its inputs:
    network = lasagne.layers.DenseLayer(
            network,
            num_units=10,
            nonlinearity=lasagne.nonlinearities.softmax)

    return network

# CNN baseline: 99%

In [5]:
num_conv = 32
mid_neurons = 256

In [6]:
def train_and_eval( model, train_x, train_y, test_x, test_y ):
    model.fit( train_x, train_y )
    p = model.predict( test_x )
    OA = sum(test_y==p)/len(test_y)
    return OA

class RandomCNN(object):
    
    def __init__(self):
        self.svm_acc = []
        self.lr_acc = []
        
    def experiment(self,initializer, nonlinearity):
        input_var = T.tensor4('inputs')
        target_var = T.ivector('targets')
        network = build_cnn(input_var, num_conv, mid_neurons,initializer,nonlinearity)
        feature_layer = lasagne.layers.get_all_layers(network)[-2]
        feature = lasagne.layers.get_output(feature_layer, deterministic=True)
        feature_fn = theano.function([input_var], feature)
        train_data = np.zeros((50000,mid_neurons))
        test_data = np.zeros((10000,mid_neurons))
        i = 0
        for batch in iterate_minibatches(X_train, y_train, 500, shuffle=False):
            inputs, targets = batch
            out = feature_fn(inputs)
            train_data[i*500:(i+1)*500,:] = out
            i += 1
        i = 0
        for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):
            inputs, targets = batch
            out = feature_fn(inputs)
            test_data[i*500:(i+1)*500,:] = out
            i += 1
        lr = LR(C=1)
        lr_auc = train_and_eval( lr, train_data, y_train, \
            test_data, y_test )
        self.lr_acc.append(lr_auc)
        svm=sklearn.svm.SVC(kernel='linear',C=1,shrinking=False)
        svm_auc = train_and_eval( svm, train_data, y_train, \
            test_data, y_test )
        self.svm_acc.append(svm_auc)
        print("lr accuracy:\t\t{:.2f} %  svm accuracy:\t\t{:.2f} %".format(100*lr_auc,100*svm_auc))
        

In [7]:
randc = RandomCNN()
for i in range(5):
    randc.experiment(initializer = lasagne.init.Normal,nonlinearity = lasagne.nonlinearities.rectify)

lr accuracy:		47.80 %  svm accuracy:		11.35 %
lr accuracy:		29.06 %  svm accuracy:		11.35 %
lr accuracy:		44.71 %  svm accuracy:		11.35 %
lr accuracy:		43.08 %  svm accuracy:		11.35 %
lr accuracy:		40.81 %  svm accuracy:		11.35 %


In [8]:
for i in range(5):
    randc.experiment(initializer = lasagne.init.Uniform,nonlinearity = lasagne.nonlinearities.rectify)

lr accuracy:		11.35 %  svm accuracy:		11.35 %
lr accuracy:		11.35 %  svm accuracy:		11.35 %
lr accuracy:		11.35 %  svm accuracy:		11.35 %
lr accuracy:		11.35 %  svm accuracy:		11.35 %
lr accuracy:		11.35 %  svm accuracy:		11.35 %


In [9]:
for i in range(5):
    randc.experiment(initializer = lasagne.init.Orthogonal,nonlinearity = lasagne.nonlinearities.rectify)

lr accuracy:		94.92 %  svm accuracy:		96.13 %
lr accuracy:		95.24 %  svm accuracy:		96.63 %
lr accuracy:		94.79 %  svm accuracy:		96.36 %
lr accuracy:		95.50 %  svm accuracy:		96.67 %
lr accuracy:		94.94 %  svm accuracy:		96.26 %


In [10]:
for i in range(5):
    randc.experiment(initializer = lasagne.init.Normal,nonlinearity = lasagne.nonlinearities.sigmoid)

lr accuracy:		11.35 %  svm accuracy:		11.35 %
lr accuracy:		11.35 %  svm accuracy:		11.35 %
lr accuracy:		11.35 %  svm accuracy:		11.35 %
lr accuracy:		11.35 %  svm accuracy:		11.35 %
lr accuracy:		11.35 %  svm accuracy:		11.35 %


In [11]:
for i in range(5):
    randc.experiment(initializer = lasagne.init.Uniform,nonlinearity = lasagne.nonlinearities.sigmoid)

lr accuracy:		11.35 %  svm accuracy:		11.35 %
lr accuracy:		11.35 %  svm accuracy:		11.35 %
lr accuracy:		11.35 %  svm accuracy:		11.35 %
lr accuracy:		11.35 %  svm accuracy:		11.35 %
lr accuracy:		11.35 %  svm accuracy:		11.35 %


In [12]:
for i in range(5):
    randc.experiment(initializer = lasagne.init.Orthogonal,nonlinearity = lasagne.nonlinearities.sigmoid)

lr accuracy:		83.73 %  svm accuracy:		88.69 %
lr accuracy:		83.21 %  svm accuracy:		88.10 %
lr accuracy:		84.00 %  svm accuracy:		89.33 %
lr accuracy:		83.82 %  svm accuracy:		88.68 %
lr accuracy:		82.23 %  svm accuracy:		88.49 %


In [None]:
for i in range(5):
    randc.experiment(initializer = lasagne.init.Normal,nonlinearity = lasagne.nonlinearities.tanh)

lr accuracy:		68.63 %  svm accuracy:		46.55 %
lr accuracy:		69.41 %  svm accuracy:		46.23 %
lr accuracy:		71.46 %  svm accuracy:		63.50 %
lr accuracy:		72.17 %  svm accuracy:		60.17 %
lr accuracy:		70.87 %  svm accuracy:		59.27 %


In [None]:
for i in range(5):
    randc.experiment(initializer = lasagne.init.Uniform,nonlinearity = lasagne.nonlinearities.tanh)

In [None]:
for i in range(5):
    randc.experiment(initializer = lasagne.init.Orthogonal,nonlinearity = lasagne.nonlinearities.tanh)