In [1]:
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
from __future__ import unicode_literals

import os
import sys

import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
sns.set()
sns.set_style("whitegrid")
sns.set_context("poster")

mpl.rcParams['figure.figsize'] = [8.0*2, 6.0*2]
mpl.rcParams['figure.dpi'] = 80
mpl.rcParams['savefig.dpi'] = 100

mpl.rcParams['font.size'] = 30
mpl.rcParams['axes.labelsize'] = 30
mpl.rcParams['ytick.labelsize'] = 30
mpl.rcParams['xtick.labelsize'] = 30
mpl.rcParams['legend.fontsize'] = 'large'
mpl.rcParams['figure.titlesize'] = 'medium'


# Introduction

On fait des plots sur les runs

In [3]:
from problem.workflow import pprint

# Data

In [4]:
from problem.mnist import load_data

In [5]:
X, y = load_data()

# Load models

In [6]:
from problem.mnist import get_save_directory
from problem.mnist import preprocessing
from problem.mnist import skew
from problem.mnist import tangent
from problem.mnist import get_cv_iter

# from main import get_data_shape

In [7]:
def load_models(model_class, *args, **kwargs):
    models = []
    cv_iter = get_cv_iter(X, y)
    n_cv = len(cv_iter)
    pprint('Loading', n_cv, model_class.__name__)
    for i in range(n_cv):
        model = model_class(*args, **kwargs)
        save_directory = get_save_directory()
        model_name = '{}-{}'.format(model.get_name(), i)
        path = os.path.join(save_directory, model_name)
        model.load(path)
        models.append(model)
    return models

In [8]:
from models.mnist import NeuralNetModel
from models.mnist import AugmentedNeuralNetModel
from models.mnist import TangentPropModel
from models.mnist import AugmentedTangentPropModel
from models.mnist import PivotModel


# Test submission

In [9]:
from problem.mnist import test_submission


In [10]:
def get_mean_std(df_list):
    z = df_list[0]['z']
    acc = np.concatenate([df['error'].values.reshape(-1, 1) for df in df_list], axis=1)
    mean_acc = np.mean(acc, axis=1)
    std_acc = np.std(acc, axis=1)
    return mean_acc, std_acc

In [11]:
# z_list = list(range(-50, 51, 10)) + list(range(-5, 6, 1))
# z_list = sorted(z_list)
z_list = np.linspace(-10, 10, num=21)

print(z_list)

[-10.  -9.  -8.  -7.  -6.  -5.  -4.  -3.  -2.  -1.   0.   1.   2.   3.
   4.   5.   6.   7.   8.   9.  10.]


In [12]:
NN_models = load_models(NeuralNetModel, n_steps=10000, batch_size=128, cuda=True)
df_list = test_submission(NN_models, X, y, z_list=z_list)
NN_mean_acc, NN_std_acc = get_mean_std(df_list)


2018-02-23 15:50:37 Loading 12 NeuralNetModel
2018-02-23 15:50:44 testing model 1/12
2018-02-23 15:51:20 testing model 2/12
2018-02-23 15:51:53 testing model 3/12
2018-02-23 15:52:27 testing model 4/12
2018-02-23 15:53:01 testing model 5/12
2018-02-23 15:53:36 testing model 6/12
2018-02-23 15:54:10 testing model 7/12
2018-02-23 15:54:45 testing model 8/12
2018-02-23 15:55:19 testing model 9/12
2018-02-23 15:55:50 testing model 10/12
2018-02-23 15:56:24 testing model 11/12
2018-02-23 15:56:58 testing model 12/12
2018-02-23 15:57:32 Done.


In [13]:
ANN_models = load_models(AugmentedNeuralNetModel, skewing_function=skew, n_augment=5,
                         batch_size=128, n_steps=10000, width=5., cuda=True)
df_list = test_submission(ANN_models, X, y, z_list=z_list)
ANN_mean_acc, ANN_std_acc = get_mean_std(df_list)


2018-02-23 15:57:32 Loading 12 AugmentedNeuralNetModel
2018-02-23 15:57:33 testing model 1/12
2018-02-23 15:58:07 testing model 2/12
2018-02-23 15:58:40 testing model 3/12
2018-02-23 15:59:14 testing model 4/12
2018-02-23 15:59:48 testing model 5/12
2018-02-23 16:00:21 testing model 6/12
2018-02-23 16:00:55 testing model 7/12
2018-02-23 16:01:29 testing model 8/12
2018-02-23 16:02:02 testing model 9/12
2018-02-23 16:02:36 testing model 10/12
2018-02-23 16:03:10 testing model 11/12
2018-02-23 16:03:44 testing model 12/12
2018-02-23 16:04:18 Done.


In [14]:
PAN_L0_models = load_models(PivotModel, skewing_function=skew, batch_size=128, n_recovery_steps=5,
                         n_clf_pre_training_steps=3000, n_adv_pre_training_steps=3000, n_steps=10000,
                         width=5., trade_off=0.0, cuda=True)
df_list = test_submission(PAN_L0_models, X, y, z_list=z_list)
PAN_L0_mean_acc, PAN_L0_std_acc = get_mean_std(df_list)


2018-02-23 16:04:18 Loading 12 PivotModel
2018-02-23 16:04:18 testing model 1/12
2018-02-23 16:04:52 testing model 2/12
2018-02-23 16:05:26 testing model 3/12
2018-02-23 16:05:57 testing model 4/12
2018-02-23 16:06:30 testing model 5/12
2018-02-23 16:07:02 testing model 6/12
2018-02-23 16:07:35 testing model 7/12
2018-02-23 16:08:09 testing model 8/12
2018-02-23 16:08:43 testing model 9/12
2018-02-23 16:09:18 testing model 10/12
2018-02-23 16:09:52 testing model 11/12
2018-02-23 16:10:26 testing model 12/12
2018-02-23 16:10:59 Done.


In [15]:
PAN_L01_models = load_models(PivotModel, skewing_function=skew, batch_size=128, n_recovery_steps=5,
                         n_clf_pre_training_steps=3000, n_adv_pre_training_steps=3000, n_steps=10000,
                         width=5., trade_off=0.1, cuda=True)
df_list = test_submission(PAN_L01_models, X, y, z_list=z_list)
PAN_L01_mean_acc, PAN_L01_std_acc = get_mean_std(df_list)


2018-02-23 16:10:59 Loading 12 PivotModel
2018-02-23 16:11:00 testing model 1/12
2018-02-23 16:11:34 testing model 2/12
2018-02-23 16:12:08 testing model 3/12
2018-02-23 16:12:42 testing model 4/12
2018-02-23 16:13:17 testing model 5/12
2018-02-23 16:13:51 testing model 6/12
2018-02-23 16:14:24 testing model 7/12
2018-02-23 16:14:58 testing model 8/12
2018-02-23 16:15:31 testing model 9/12
2018-02-23 16:16:03 testing model 10/12
2018-02-23 16:16:37 testing model 11/12
2018-02-23 16:17:11 testing model 12/12
2018-02-23 16:17:45 Done.


In [None]:
PAN_L1_models = load_models(PivotModel, skewing_function=skew, batch_size=128, n_recovery_steps=5,
                         n_clf_pre_training_steps=3000, n_adv_pre_training_steps=3000, n_steps=10000,
                         width=5., trade_off=1.0, cuda=True)
df_list = test_submission(PAN_L1_models, X, y, z_list=z_list)
PAN_L1_mean_acc, PAN_L1_std_acc = get_mean_std(df_list)


2018-02-23 16:17:45 Loading 12 PivotModel
2018-02-23 16:17:46 testing model 1/12
2018-02-23 16:18:20 testing model 2/12
2018-02-23 16:18:56 testing model 3/12
2018-02-23 16:19:29 testing model 4/12
2018-02-23 16:20:02 testing model 5/12
2018-02-23 16:20:36 testing model 6/12
2018-02-23 16:21:09 testing model 7/12
2018-02-23 16:21:41 testing model 8/12
2018-02-23 16:22:13 testing model 9/12
2018-02-23 16:22:46 testing model 10/12
2018-02-23 16:23:21 testing model 11/12
2018-02-23 16:23:54 testing model 12/12
2018-02-23 16:24:29 Done.


In [None]:
PAN_L10_models = load_models(PivotModel, skewing_function=skew, batch_size=128, n_recovery_steps=5,
                         n_clf_pre_training_steps=3000, n_adv_pre_training_steps=3000, n_steps=10000,
                         width=5., trade_off=10.0, cuda=True)
df_list = test_submission(PAN_L10_models, X, y, z_list=z_list)
PAN_L10_mean_acc, PAN_L10_std_acc = get_mean_std(df_list)


2018-02-23 16:24:29 Loading 12 PivotModel
2018-02-23 16:24:29 testing model 1/12


In [None]:
PAN_L100_models = load_models(PivotModel, skewing_function=skew, batch_size=128, n_recovery_steps=5,
                         n_clf_pre_training_steps=3000, n_adv_pre_training_steps=3000, n_steps=10000,
                         width=5., trade_off=100.0, cuda=True)
df_list = test_submission(PAN_L100_models, X, y, z_list=z_list)
PAN_L100_mean_acc, PAN_L100_std_acc = get_mean_std(df_list)


# Plot


- What about confusion matrices ???


In [None]:
z = z_list

plt.figure(figsize=(8*2,6*2))

plt.plot(z, ANN_mean_acc, 'o-', label='NNP')
# plt.fill_between(z, NNDA_mean_acc+NNDA_std_acc, NNDA_mean_acc-NNDA_std_acc, alpha=0.5)

plt.plot(z, NN_mean_acc, 'o-', label='NN')
# plt.fill_between(z, NN_mean_acc+NN_std_acc, NN_mean_acc-NN_std_acc, alpha=0.5)

plt.plot(z, PAN_L0_mean_acc, 'o-', label='0.0')
# plt.fill_between(z, PAN_L0_mean_acc+PAN_L0_std_acc, PAN_L0_mean_acc-PAN_L0_std_acc, alpha=0.5)

plt.plot(z, PAN_L01_mean_acc, 'o-', label='0.1')
# plt.fill_between(z, PAN_L01_mean_acc+PAN_L01_std_acc, PAN_L01_mean_acc-PAN_L01_std_acc, alpha=0.5)

plt.plot(z, PAN_L1_mean_acc, 'o-', label='1')
# plt.fill_between(z, PAN_L1_mean_acc+PAN_L1_std_acc, PAN_L1_mean_acc-PAN_L1_std_acc, alpha=0.5)

plt.plot(z, PAN_L10_mean_acc, 'o-', label='10')
# plt.fill_between(z, PAN_L10_mean_acc+PAN_L10_std_acc, PAN_L10_mean_acc-PAN_L10_std_acc, alpha=0.5)

# plt.plot(z, PAN_L100_mean_acc, 'o-', label='100')
# plt.fill_between(z, PAN_L100_mean_acc+PAN_L100_std_acc, PAN_L100_mean_acc-PAN_L100_std_acc, alpha=0.5)


plt.xlabel('angle')
plt.ylabel('accuracy')
plt.legend()
plt.show()


In [None]:
# plt.figure(figsize=(15,8))

zz = np.random.normal( loc=0, scale=5, size=(X.shape[0]) )
sns.distplot(zz, label='5')

zz = np.random.normal( loc=0, scale=10, size=(X.shape[0]) )
sns.distplot(zz, label='10')

zz = np.random.normal( loc=0, scale=15, size=(X.shape[0]) )
sns.distplot(zz, label='15')

plt.xlabel('angle')
plt.legend()
plt.show()

## Losses

In [None]:
i = 0

plt.plot(NN_models[i].loss_hook.losses[::50], label='NN')
plt.plot(ANN_models[i].loss_hook.losses[::50], label='ANN')
plt.plot(PAN_0_models[i].dloss_hook.losses[::50], label='0.0')
plt.plot(PAN_01_models[i].dloss_hook.losses[::50], label='0.1')
plt.plot(PAN_1_models[i].dloss_hook.losses[::50], label='1')
plt.plot(PAN_10_models[i].dloss_hook.losses[::50], label='10')

plt.title('Cross entropy')
plt.legend()
plt.show()

# Playground

In [None]:
# i = 0
# m = models[i]
# cv_iter = get_cv_iter(X, y)
# idx_dev, idx_valid = cv_iter[i]
# X_test = X[idx_valid]
# y_test = y[idx_valid]
# acc = accuracy_score(y_test, m.predict(X_test))

# print(acc)


In [None]:
# XX = skew(X_test, z=80)
# y_pred = m.predict(XX)
# acc = accuracy_score(y_test, m.predict(XX))
# print(acc)

# i = np.random.randint(0, XX.shape[0])
# plt.imshow(XX[i].reshape(28,28))

# plt.title('label = {}<->truth = {}'.format(y_pred[i], y_test[i]))
# plt.show()

# plt.imshow(X_test[i].reshape(28,28))
# plt.show()


In [None]:
# X_test.shape

In [None]:
# from myNNDA import data_augment

In [None]:
# XX = X[:200]
# yy = y[:200]
# XX2, yy, _ = data_augment(XX, yy, None)

In [None]:
# i = 67
# x = XX2[i].reshape(28, 28)
# plt.imshow(x, cmap='Greys')
# plt.title('skewed')
# plt.show()
# x = XX[i].reshape(28, 28)
# plt.imshow(x, cmap='Greys')
# plt.title('original')
# plt.show()