In [1]:
'''Trains a simple deep NN on the MNIST dataset.
Gets to 98.40% test accuracy after 20 epochs
(there is *a lot* of margin for parameter tuning).
2 seconds per epoch on a K520 GPU.
'''
import keras
import keras.backend as K
from keras.datasets import mnist
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Dropout, Activation
from keras.optimizers import RMSprop
from keras.callbacks import CSVLogger, ReduceLROnPlateau
import os
from keras.layers.noise import AlphaDropout
import keras.activations

import numpy as np
from scipy import stats
import pandas as pd
import time

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
batch_size = 64
num_classes = 10
epochs = 200
units = 64
experiments = 5
start = 0

activations = ['selu', 'sigmoid']
optimizers = ['Adamax', 'sgd']

lrs = [0.1, 0.01, 0.002, 0.001]

In [3]:
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

60000 train samples
10000 test samples


In [5]:
start_time = time.time()
counter = 0
total_items = len(activations) * len(optimizers) * experiments * len(lrs)

for lr in lrs:
    for act in activations:
        for opt in optimizers:
            print("Training for activation " + act + " with optimizer " + opt + " and lr = " + str(lr))
            for i in range(experiments):

                act_dict = {
                  'sigmoid': Activation(keras.activations.sigmoid),
                  'selu': Activation(keras.activations.selu),
                }

                opt_dict = {
                  'sgd': keras.optimizers.SGD(lr=lr),
                  'Adamax': keras.optimizers.Adamax(lr=lr),
                }

                model_name = 'normd_fixed_' + str(lr) + "_" + act + "_" + opt + '_' + str(i + start)
                inputs = Input(shape=(784,))
                x = Dense(units, name = 'dense_1')(inputs)
                x = act_dict[act](x)
                x = Dropout(0.2)(x)
                x = Dense(units, name = 'dense_2')(x)
                x = act_dict[act](x)
                x = Dropout(0.2)(x)
                predictions = Dense(num_classes, activation='softmax', name = 'dense_output')(x)
                model = Model(inputs=inputs, outputs=predictions)

                model.compile(loss='categorical_crossentropy',
                                  optimizer=opt_dict[opt],
                                  metrics=['accuracy'])
                print('-'*30)
                print('Experiment', i)

                csv_logger = CSVLogger('./training_logs/%s_%d.csv' % (model_name, units), append=False)
                history = model.fit(x_train, y_train,
                                    batch_size=batch_size,
                                    epochs=epochs,
                                    verbose=0,
                                    validation_data=(x_test, y_test), callbacks=[csv_logger])

                score = model.evaluate(x_test, y_test, verbose=0)
                print('Test accuracy:', score[1])

                t = time.time()
                time_diff = t - start_time
                counter +=1
                rem_items = total_items - counter
                total_time = round((total_items / counter) * time_diff)
                rem_time = round(total_time - time_diff)
                m, s = divmod(rem_time, 60)
                h, m = divmod(m, 60)
                d, h = divmod(h, 24)
                print('Remaining time: %d days %d hours %02d minutes %02d seconds' % (d, h, m, s))

Training for activation selu with optimizer Adamax and lr = 0.1
------------------------------
Experiment 0
Test accuracy: 0.9643
Remaining time: 0 days 10 hours 47 minutes 35 seconds
------------------------------
Experiment 1
Test accuracy: 0.9404
Remaining time: 0 days 10 hours 31 minutes 52 seconds
------------------------------
Experiment 2
Test accuracy: 0.9574
Remaining time: 0 days 10 hours 19 minutes 46 seconds
------------------------------
Experiment 3
Test accuracy: 0.9583
Remaining time: 0 days 10 hours 14 minutes 28 seconds
------------------------------
Experiment 4
Test accuracy: 0.9456
Remaining time: 0 days 10 hours 12 minutes 38 seconds
Training for activation selu with optimizer sgd and lr = 0.1
------------------------------
Experiment 0
Test accuracy: 0.98
Remaining time: 0 days 10 hours 04 minutes 39 seconds
------------------------------
Experiment 1
Test accuracy: 0.9793
Remaining time: 0 days 9 hours 56 minutes 30 seconds
------------------------------
Experim

------------------------------
Experiment 2
Test accuracy: 0.9796
Remaining time: 0 days 2 hours 55 minutes 52 seconds
------------------------------
Experiment 3
Test accuracy: 0.9789
Remaining time: 0 days 2 hours 46 minutes 43 seconds
------------------------------
Experiment 4
Test accuracy: 0.9779
Remaining time: 0 days 2 hours 37 minutes 25 seconds
Training for activation selu with optimizer sgd and lr = 0.001
------------------------------
Experiment 0
Test accuracy: 0.9381
Remaining time: 0 days 2 hours 27 minutes 51 seconds
------------------------------
Experiment 1
Test accuracy: 0.9332
Remaining time: 0 days 2 hours 18 minutes 06 seconds
------------------------------
Experiment 2
Test accuracy: 0.9383
Remaining time: 0 days 2 hours 08 minutes 11 seconds
------------------------------
Experiment 3
Test accuracy: 0.9353
Remaining time: 0 days 1 hours 58 minutes 09 seconds
------------------------------
Experiment 4
Test accuracy: 0.9383
Remaining time: 0 days 1 hours 48 minu