In [1]:
'''Trains a simple deep NN on the MNIST dataset.
Gets to 98.40% test accuracy after 20 epochs
(there is *a lot* of margin for parameter tuning).
2 seconds per epoch on a K520 GPU.
'''
import keras
import keras.backend as K
from keras.datasets import mnist
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Dropout, Activation, GaussianDropout, GaussianNoise
from keras.optimizers import RMSprop
from keras.callbacks import CSVLogger, Callback, ReduceLROnPlateau
import os
from keras.layers.noise import AlphaDropout
import keras.activations
import numpy as np
from scipy import stats
import pandas as pd
import time

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

num_classes = 10

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

def standardize_sample_wise(dataset):
    standardized = []
    for s in dataset:
        mean, std = s.mean(), s.std()
        s = (s - mean) / std
        standardized.append(s)
    return np.array(standardized)

x_train = standardize_sample_wise(x_train)
x_test = standardize_sample_wise(x_test)
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [3]:
batch_size = 64

epochs = 100
units = 64
experiments = 5
start = 0
activations = ['selu', 'sigmoid']
optimizers = ['Adamax', 'sgd']

configs = [
    {
        'type': 'no_dropout',
        'rate': 0.0
    },
#     This has been done in 02_optimizers
#     {
#         'type': 'dropout',
#         'rate': 0.2
#     },
    {
        'type': 'dropout',
        'rate': 0.35
    },
    {
        'type': 'dropout',
        'rate': 0.5
    },
    {
        'type': 'alpha_dropout',
        'rate': 0.2
    },
    {
        'type': 'alpha_dropout',
        'rate': 0.35
    },
    {
        'type': 'alpha_dropout',
        'rate': 0.5
    },
    {
        'type': 'gaussian_dropout',
        'rate': 0.2
    },
    {
        'type': 'gaussian_dropout',
        'rate': 0.35
    },
    {
        'type': 'gaussian_dropout',
        'rate': 0.5
    }
]

In [4]:
configs = [
    {
        'type': 'dropout',
        'rate': 0.1
    },
    {
        'type': 'dropout',
        'rate': 0.7
    },
    {
        'type': 'alpha_dropout',
        'rate': 0.1
    },
    {
        'type': 'alpha_dropout',
        'rate': 0.7
    },
    {
        'type': 'gaussian_dropout',
        'rate': 0.1
    },
    {
        'type': 'gaussian_dropout',
        'rate': 0.7
    },
    {
        'type': 'gaussian_noise',
        'rate': 0.05
    },
    {
        'type': 'gaussian_noise',
        'rate': 0.5
    },
    {
        'type': 'gaussian_noise',
        'rate': 1.0
    }
]

In [5]:
start_time = time.time()
counter = 0
total_items = (len(activations))* len(optimizers) * experiments * len(configs)

for cfg in configs:

    for act in activations:
        for opt in optimizers:
            print("Training for activation " + act + ", optimizer " + opt + " with config " + str(cfg))

            for i in range(experiments):
                K.clear_session()
                K.reset_uids()
                # Selecting activation function
                act_dict = {
                  'sigmoid': Activation(keras.activations.sigmoid),
                  'tanh': Activation(keras.activations.tanh),
                  'relu': Activation(keras.activations.relu),
                  'linear': Activation(keras.activations.linear),
                  'elu': Activation(keras.activations.elu),
                  'softplus': Activation(keras.activations.softplus),
                  'softsign': Activation(keras.activations.softsign),
                  'hard_sigmoid': Activation(keras.activations.hard_sigmoid),
                  'LeakyReLU': keras.layers.advanced_activations.LeakyReLU(),
                  'selu': Activation(keras.activations.selu),
                  'ThresholdedReLU': keras.layers.advanced_activations.ThresholdedReLU(theta=0.7) 
                }

                # Selecting optimizer
                opt_dict = {
                  'rmsp': keras.optimizers.rmsprop(lr=0.001),
                  'adam': keras.optimizers.Adam(),
                  'sgd': keras.optimizers.SGD(),
                  'Adagrad': keras.optimizers.Adagrad(),
                  'Adadelta': keras.optimizers.Adadelta(),
                  'Adamax': keras.optimizers.Adamax(),
                  'Nadam': keras.optimizers.Nadam()
                }


                model_name = 'std_%s_%0.2f_%s_%s_%d_%d' % (cfg['type'], cfg['rate'], act, opt, (i + start), units)
                inputs = Input(shape=(784,))
                x = Dense(units, name = 'dense_1')(inputs)
                x = act_dict[act](x)
                
                if cfg['type'] == 'dropout':
                    x = Dropout(cfg['rate'])(x)
                elif cfg['type'] == 'alpha_dropout':
                    x = AlphaDropout(cfg['rate'])(x)
                elif cfg['type'] == 'gaussian_dropout':
                    x = GaussianDropout(cfg['rate'])(x)
                elif cfg['type'] == 'gaussian_noise':
                    x = GaussianDropout(cfg['rate'])(x)
                    
                x = Dense(units, name = 'dense_2')(x)
                x = act_dict[act](x)
                
                if cfg['type'] == 'dropout':
                    x = Dropout(cfg['rate'])(x)
                elif cfg['type'] == 'alpha_dropout':
                    x = AlphaDropout(cfg['rate'])(x)
                elif cfg['type'] == 'gaussian_dropout':
                    x = GaussianDropout(cfg['rate'])(x)
                elif cfg['type'] == 'gaussian_noise':
                    x = GaussianDropout(cfg['rate'])(x)
    
                predictions = Dense(num_classes, activation='softmax', name = 'dense_output')(x)
                model = Model(inputs=inputs, outputs=predictions)

                model.compile(loss='categorical_crossentropy',
                                  optimizer=opt_dict[opt],
                                  metrics=['accuracy'])
                print('-'*30)
                print('Experiment', i)

                csv_logger = CSVLogger('./training_logs/%s.csv' % (model_name), append=False)
                history = model.fit(x_train, y_train,
                                    batch_size=batch_size,
                                    epochs=epochs,
                                    verbose=0,
                                    validation_data=(x_test, y_test), callbacks=[csv_logger])

                score = model.evaluate(x_test, y_test, verbose=0)
                
                print('Test accuracy:', score[1])

                t = time.time()
                time_diff = t - start_time
                counter +=1
                rem_items = total_items - counter
                total_time = round((total_items / counter) * time_diff)
                rem_time = round(total_time - time_diff)
                m, s = divmod(rem_time, 60)
                h, m = divmod(m, 60)
                d, h = divmod(h, 24)
                print('Remaining time: %d days %d hours %02d minutes %02d seconds' % (d, h, m, s))


Training for activation selu, optimizer Adamax with config {'type': 'dropout', 'rate': 0.1}
------------------------------
Experiment 0
Test accuracy: 0.9808
Remaining time: 0 days 15 hours 15 minutes 15 seconds
------------------------------
Experiment 1
Test accuracy: 0.9816
Remaining time: 0 days 15 hours 16 minutes 55 seconds
------------------------------
Experiment 2
Test accuracy: 0.9813
Remaining time: 0 days 15 hours 23 minutes 41 seconds
------------------------------
Experiment 3
Test accuracy: 0.9803
Remaining time: 0 days 15 hours 19 minutes 05 seconds
------------------------------
Experiment 4
Test accuracy: 0.9818
Remaining time: 0 days 15 hours 13 minutes 07 seconds
Training for activation selu, optimizer sgd with config {'type': 'dropout', 'rate': 0.1}
------------------------------
Experiment 0
Test accuracy: 0.9778
Remaining time: 0 days 15 hours 01 minutes 02 seconds
------------------------------
Experiment 1
Test accuracy: 0.9786
Remaining time: 0 days 14 hours 5

------------------------------
Experiment 4
Test accuracy: 0.9008
Remaining time: 0 days 9 hours 35 minutes 07 seconds
Training for activation selu, optimizer Adamax with config {'type': 'alpha_dropout', 'rate': 0.7}
------------------------------
Experiment 0
Test accuracy: 0.9382
Remaining time: 0 days 9 hours 30 minutes 46 seconds
------------------------------
Experiment 1
Test accuracy: 0.9347
Remaining time: 0 days 9 hours 26 minutes 27 seconds
------------------------------
Experiment 2
Test accuracy: 0.9346
Remaining time: 0 days 9 hours 22 minutes 07 seconds
------------------------------
Experiment 3
Test accuracy: 0.936
Remaining time: 0 days 9 hours 17 minutes 53 seconds
------------------------------
Experiment 4
Test accuracy: 0.937
Remaining time: 0 days 9 hours 13 minutes 36 seconds
Training for activation selu, optimizer sgd with config {'type': 'alpha_dropout', 'rate': 0.7}
------------------------------
Experiment 0
Test accuracy: 0.921
Remaining time: 0 days 9 hours

------------------------------
Experiment 3
Test accuracy: 0.9131
Remaining time: 0 days 5 hours 01 minutes 54 seconds
------------------------------
Experiment 4
Test accuracy: 0.9053
Remaining time: 0 days 4 hours 56 minutes 58 seconds
Training for activation selu, optimizer Adamax with config {'type': 'gaussian_noise', 'rate': 0.05}
------------------------------
Experiment 0
Test accuracy: 0.9796
Remaining time: 0 days 4 hours 52 minutes 16 seconds
------------------------------
Experiment 1
Test accuracy: 0.98
Remaining time: 0 days 4 hours 47 minutes 33 seconds
------------------------------
Experiment 2
Test accuracy: 0.9817
Remaining time: 0 days 4 hours 42 minutes 53 seconds
------------------------------
Experiment 3
Test accuracy: 0.9794
Remaining time: 0 days 4 hours 38 minutes 13 seconds
------------------------------
Experiment 4
Test accuracy: 0.9797
Remaining time: 0 days 4 hours 33 minutes 30 seconds
Training for activation selu, optimizer sgd with config {'type': 'gau

Test accuracy: 0.9691
Remaining time: 0 days 0 hours 09 minutes 22 seconds
------------------------------
Experiment 3
Test accuracy: 0.9687
Remaining time: 0 days 0 hours 04 minutes 40 seconds
------------------------------
Experiment 4
Test accuracy: 0.9671
Remaining time: 0 days 0 hours 00 minutes 00 seconds
