# Experiments 5: CNN on MNIST

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf

from data import Datafile, load_data
from influence.emp_risk_optimizer import EmpiricalRiskOptimizer
from influence.plot_utils import compare_with_loo, show_graph
from influence.closed_forms import I_loss_RidgeCf
from models.neural_nets import ConvNet

In [3]:
X_train, X_test, y_train, y_test, test_indices = load_data(
    Datafile.BinaryMNIST17, test_config=10)
n_tr, p = X_train.shape
n_te, _ = X_test.shape
y_train_onehot = np.eye(2)[y_train.reshape(-1)]
y_test_onehot = np.eye(2)[y_test.reshape(-1)]
print(n_tr, p)

X_train shape: (9075, 784)
y_train shape: (9075, 1)
X_test shape: (10, 784)
y_test shape: (10, 1)
9075 784


In [4]:
init_eta = 0.01
batch_size = 1000
train_iter = 100
traceback_checkpoint = 800
loo_extra_iter = 200
decay_epochs = (10000, 20000)
checkpoint_iter = traceback_checkpoint - 1
iter_to_switch_off_minibatch = np.inf
iter_to_switch_to_sgd = np.inf
# LOO a on random set of training indices, otherwise too slow
leave_indices = np.random.choice(n_tr, size=150, replace=False)

if hasattr(test_indices, '__iter__') and hasattr(leave_indices, '__iter__'):
    assert not set(test_indices) & set(leave_indices)
    print(test_indices)
    print(leave_indices)

## Configure and Fit CNN

In [5]:
model = ConvNet(
    model_name='CNN-MNIST',
    init_eta=init_eta,
    decay_epochs=decay_epochs,
    batch_size=batch_size,
    input_side=28,
    n_channels=1,
    filter_size1=5,
    n_filters1=10,
    filter_size2=5,
    n_filters2=10,
    fc_size=24,
    down_sample=2
)

In [6]:
tf.reset_default_graph()
model.fit(
    X_train, y_train_onehot,
    n_iter=train_iter,
    verbose=1000,
    iter_to_switch_off_minibatch=iter_to_switch_off_minibatch,
    iter_to_switch_to_sgd=iter_to_switch_to_sgd,
    traceback_checkpoint=traceback_checkpoint,
    show_eval=False
)

Step 0, Epoch 0: loss = 76765.40625000 (1.008 sec)
Step 1, Epoch 0: loss = 34126.02734375 (0.901 sec)
Step 2, Epoch 0: loss = 15501.31054688 (0.711 sec)
Step 3, Epoch 0: loss = 7390.31445312 (1.172 sec)
Step 4, Epoch 0: loss = 4149.62841797 (0.989 sec)
Step 5, Epoch 0: loss = 2094.86938477 (1.041 sec)
Step 6, Epoch 0: loss = 2753.43408203 (0.869 sec)
Step 7, Epoch 0: loss = 1865.51843262 (0.985 sec)
Step 8, Epoch 0: loss = 1918.19470215 (0.924 sec)
Step 9, Epoch 0: loss = 865.78460693 (0.954 sec)
Step 10, Epoch 1: loss = 732.99975586 (0.945 sec)
Step 11, Epoch 1: loss = 749.20526123 (0.948 sec)
Step 12, Epoch 1: loss = 980.15704346 (1.227 sec)
Step 13, Epoch 1: loss = 1026.44531250 (0.741 sec)
Step 14, Epoch 1: loss = 1046.59777832 (0.714 sec)
Step 15, Epoch 1: loss = 121.00914001 (0.699 sec)
Step 16, Epoch 1: loss = 825.73211670 (0.729 sec)
Step 17, Epoch 1: loss = 288.76794434 (0.717 sec)
Step 18, Epoch 1: loss = 391.73974609 (0.722 sec)
Step 19, Epoch 2: loss = 445.88302612 (0.716 s

CNN-MNIST(init_eta=0.01,batch_size=1000,decay_epochs=(10000, 20000),filter_size1=5,n_filters1=10,filter_size2=5,n_filters2=10,fc_size=24)

In [7]:
print("Train accuracy:", np.sum(
    model.predict(X_train).reshape(n_tr,1) == y_train)/n_tr)
print("Test accuracy:", np.sum(
    model.predict(X_test).reshape(n_te,1) == y_test)/n_te)

Train accuracy: 0.9963636363636363
Test accuracy: 0.9


In [8]:
show_graph(tf.get_default_graph())

In [8]:
model.get_eval(items=['params'])

{'W_conv1': array([[[[-0.72417206, -1.1240464 , -0.13842079, -0.688564  ,
            0.82220465, -0.79050654,  0.23417321, -0.28049374,
            0.590899  , -0.16155699]],
 
         [[ 1.201862  ,  0.6207388 ,  0.04059073, -1.6264727 ,
            1.3060349 , -0.48396948, -0.05822701, -1.2270124 ,
            0.5916745 ,  1.2019327 ]],
 
         [[ 0.24650642, -1.1463534 , -0.79725474, -0.16483852,
           -1.8171767 ,  1.493135  ,  0.06181991,  0.02110202,
           -0.7355042 , -0.769212  ]],
 
         [[-0.55052567,  0.66153234, -1.7484246 , -1.416028  ,
            0.4263908 , -0.7536629 ,  0.7945798 ,  0.14405434,
           -0.28679988, -1.5425204 ]],
 
         [[-0.68903404,  0.2733832 , -1.064043  , -0.41670704,
           -0.48764002, -1.3895894 ,  1.0992223 ,  1.0268795 ,
            0.70539665, -1.4115741 ]]],
 
 
        [[[ 0.46435028,  1.2497951 , -0.44337797, -1.6932778 ,
            0.45348525, -1.6680089 ,  0.43014035,  0.84859943,
            0.8137065 ,  

In [9]:
model.n_params

14604

In [None]:
I_loss_bf = model.influence_loss(
    X_test, y_test_onehot,
    leave_indices=leave_indices,
    method='brute-force',
    damping=0.01
)

In [None]:
I_loss_cg = model.influence_loss(
    X_test, y_test_onehot,
    leave_indices=leave_indices,
    method='cg',
    damping=0.01,
    tol=1e-4,
    max_iter=100
)

Fetch training loss gradients (0.913 sec)
CG Objective: 0.0
Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 1
         Function evaluations: 2
         Gradient evaluations: 2
         Hessian evaluations: 0
