# MNIST test accuracy    SVM: 92.450%  LR: 91.230%
# MNIST first 16 pcomp test accuracy    SVM: 88.335%  LR: 82.710%
# MNIST first 256 pcomp test accuracy    SVM: 92.795%  LR: 90.850%

In [28]:
import sys
import os
import time
import numpy as np
import theano
import theano.tensor as T
import lasagne
import sklearn
from sklearn.linear_model import LogisticRegression as LR


In [2]:
from mnist import *

In [3]:
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()

In [9]:
print(X_train.shape, X_val.shape, X_test.shape)

(50000, 1, 28, 28) (10000, 1, 28, 28) (10000, 1, 28, 28)


In [7]:
y_train.shape

(50000,)

# CNN baseline: 99.13%  when num_conv = 32 mid_neurons = 256

In [63]:
num_conv = 32
mid_neurons = 256

In [64]:
main(model='cnn')

Loading data...
Building model and compiling functions...
Starting training...
Epoch 1 of 500 took 9.533s
  training loss:		0.455041
  validation loss:		0.143414
  validation accuracy:		96.02 %
Epoch 2 of 500 took 9.462s
  training loss:		0.124603
  validation loss:		0.096964
  validation accuracy:		97.29 %
Epoch 3 of 500 took 9.131s
  training loss:		0.087163
  validation loss:		0.076745
  validation accuracy:		97.85 %
Epoch 4 of 500 took 9.132s
  training loss:		0.070961
  validation loss:		0.067336
  validation accuracy:		98.12 %
Epoch 5 of 500 took 9.444s
  training loss:		0.059511
  validation loss:		0.063042
  validation accuracy:		98.24 %
Epoch 6 of 500 took 9.585s
  training loss:		0.051938
  validation loss:		0.055336
  validation accuracy:		98.41 %
Epoch 7 of 500 took 9.132s
  training loss:		0.046074
  validation loss:		0.053247
  validation accuracy:		98.49 %
Epoch 8 of 500 took 9.131s
  training loss:		0.041804
  validation loss:		0.050682
  validation accuracy:		98.56 %
E

In [45]:
def train_and_eval( model, train_x, train_y, test_x, test_y ):
    model.fit( train_x, train_y )
    p = model.predict( test_x )
    OA = sum(test_y==p)/len(test_y)
    return OA

class RandomCNN(object):
    
    def __init__(self):
        self.svm_acc = []
        self.lr_acc = []
        
    def experiment(self):
        input_var = T.tensor4('inputs')
        target_var = T.ivector('targets')
        network = build_cnn(input_var, num_conv, mid_neurons)
        feature_layer = lasagne.layers.get_all_layers(network)[-2]
        feature = lasagne.layers.get_output(feature_layer, deterministic=True)
        feature_fn = theano.function([input_var], feature)
        train_data = np.zeros((50000,mid_neurons))
        test_data = np.zeros((10000,mid_neurons))
        i = 0
        for batch in iterate_minibatches(X_train, y_train, 500, shuffle=False):
            inputs, targets = batch
            out = feature_fn(inputs)
            train_data[i*500:(i+1)*500,:] = out
            i += 1
        i = 0
        for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):
            inputs, targets = batch
            out = feature_fn(inputs)
            test_data[i*500:(i+1)*500,:] = out
            i += 1
        lr = LR(C=1)
        lr_auc = train_and_eval( lr, train_data, y_train, \
            test_data, y_test )
        self.lr_acc.append(lr_auc)
        svm=sklearn.svm.SVC(kernel='linear',C=1,shrinking=False)
        svm_auc = train_and_eval( svm, train_data, y_train, \
            test_data, y_test )
        self.svm_acc.append(svm_auc)
        print("lr accuracy:\t\t{:.2f} %  svm accuracy:\t\t{:.2f} %".format(100*lr_auc,100*svm_auc))
        

In [48]:
randc = RandomCNN()
for i in range(20):
    randc.experiment()

lr accuracy:		95.53 %  svm accuracy:		96.17 %
lr accuracy:		95.32 %  svm accuracy:		96.04 %
lr accuracy:		95.56 %  svm accuracy:		96.20 %
lr accuracy:		96.10 %  svm accuracy:		96.85 %
lr accuracy:		95.50 %  svm accuracy:		95.94 %
lr accuracy:		95.72 %  svm accuracy:		96.30 %
lr accuracy:		94.73 %  svm accuracy:		95.73 %
lr accuracy:		95.61 %  svm accuracy:		96.21 %
lr accuracy:		95.78 %  svm accuracy:		96.27 %
lr accuracy:		95.49 %  svm accuracy:		96.22 %
lr accuracy:		95.57 %  svm accuracy:		96.10 %
lr accuracy:		94.84 %  svm accuracy:		95.97 %
lr accuracy:		95.24 %  svm accuracy:		96.00 %
lr accuracy:		95.22 %  svm accuracy:		95.94 %
lr accuracy:		95.49 %  svm accuracy:		96.17 %
lr accuracy:		95.19 %  svm accuracy:		95.85 %
lr accuracy:		95.26 %  svm accuracy:		95.97 %
lr accuracy:		95.17 %  svm accuracy:		95.76 %
lr accuracy:		95.31 %  svm accuracy:		96.03 %
lr accuracy:		94.53 %  svm accuracy:		95.27 %


In [49]:
import pickle
pickle.dump( randc, open( "randc.p", "wb" ) )

In [56]:
num_conv = 128
mid_neurons = 256
randc2 = RandomCNN()
randc2.experiment()

lr accuracy:		94.75 %  svm accuracy:		95.98 %


In [59]:
num_conv = 32
mid_neurons = 1024
randc2 = RandomCNN()
for i in range(5):
    randc2.experiment()

lr accuracy:		98.03 %  svm accuracy:		98.05 %
lr accuracy:		97.77 %  svm accuracy:		97.76 %
lr accuracy:		97.79 %  svm accuracy:		97.69 %
lr accuracy:		97.91 %  svm accuracy:		98.00 %
lr accuracy:		98.14 %  svm accuracy:		98.00 %


In [58]:
num_conv = 32
mid_neurons = 2048
randc2 = RandomCNN()
randc2.experiment()

lr accuracy:		98.47 %  svm accuracy:		98.41 %


In [67]:
num_conv = 32
mid_neurons = 8192
randc2 = RandomCNN()
randc2.experiment()

lr accuracy:		98.63 %  svm accuracy:		98.65 %


In [55]:
num_conv = 32
mid_neurons = 128
randc2 = RandomCNN()
randc2.experiment()

lr accuracy:		91.73 %  svm accuracy:		93.78 %


In [53]:
num_conv = 128
mid_neurons = 64
randc2 = RandomCNN()
randc2.experiment()

lr accuracy:		86.77 %  svm accuracy:		89.65 %


In [54]:
num_conv = 256
mid_neurons = 16
randc3 = RandomCNN()
randc3.experiment()

lr accuracy:		53.79 %  svm accuracy:		56.49 %


# only train last layer: 95.280%

In [65]:
num_epochs = 500
input_var = T.tensor4('inputs')
target_var = T.ivector('targets')
network = build_cnn(input_var, num_conv, mid_neurons)
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean()
params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(
    loss, params[-2:], learning_rate=0.01, momentum=0.9)
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,target_var)
test_loss = test_loss.mean()
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                    dtype=theano.config.floatX)
train_fn = theano.function([input_var, target_var], loss, updates=updates)
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
for epoch in range(num_epochs):
    # In each epoch, we do a full pass over the training data:
    train_err = 0
    train_batches = 0
    start_time = time.time()
    for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True):
        inputs, targets = batch
        train_err += train_fn(inputs, targets)
        train_batches += 1
    val_err = 0
    val_acc = 0
    val_batches = 0
    for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        val_err += err
        val_acc += acc
        val_batches += 1

        # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, num_epochs, time.time() - start_time))
    print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
    print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
    print("  validation accuracy:\t\t{:.2f} %".format(
        val_acc / val_batches * 100))
    # After training, we compute and print the test error:
test_err = 0
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):
    inputs, targets = batch
    err, acc = val_fn(inputs, targets)
    test_err += err
    test_acc += acc
    test_batches += 1
final_acc = test_acc / test_batches
print("Final results:")
print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print("  test accuracy:\t\t{:.2f} %".format(
    final_acc * 100))

Epoch 1 of 500 took 3.121s
  training loss:		1.357918
  validation loss:		0.785271
  validation accuracy:		82.74 %
Epoch 2 of 500 took 3.122s
  training loss:		0.694636
  validation loss:		0.568171
  validation accuracy:		86.77 %
Epoch 3 of 500 took 3.121s
  training loss:		0.559798
  validation loss:		0.481072
  validation accuracy:		88.43 %
Epoch 4 of 500 took 3.121s
  training loss:		0.493769
  validation loss:		0.433400
  validation accuracy:		89.31 %
Epoch 5 of 500 took 3.122s
  training loss:		0.452526
  validation loss:		0.400661
  validation accuracy:		89.80 %
Epoch 6 of 500 took 3.125s
  training loss:		0.423413
  validation loss:		0.376930
  validation accuracy:		90.20 %
Epoch 7 of 500 took 3.122s
  training loss:		0.401235
  validation loss:		0.357617
  validation accuracy:		90.65 %
Epoch 8 of 500 took 3.122s
  training loss:		0.383640
  validation loss:		0.343747
  validation accuracy:		90.80 %
Epoch 9 of 500 took 3.122s
  training loss:		0.369247
  validation loss:		0.3306

In [66]:
final_acc

0.95279999971389773