In [4]:
'''Trains a simple deep NN on the MNIST dataset.
Gets to 98.40% test accuracy after 20 epochs
(there is *a lot* of margin for parameter tuning).
2 seconds per epoch on a K520 GPU.
'''
import keras
import keras.backend as K
from keras.datasets import mnist
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Dropout, Activation
from keras.optimizers import RMSprop
from keras.callbacks import CSVLogger, ReduceLROnPlateau
import os
from keras.layers.noise import AlphaDropout
import keras.activations

import numpy as np
from scipy import stats
import pandas as pd
import time

In [5]:
batch_size = 64
num_classes = 10
epochs = 200
units = 64
experiments = 5
start = 0

activations = ['selu', 'sigmoid']
optimizers = ['Adamax', 'sgd']

lrs = [0.1, 0.01, 0.002, 0.001]

In [6]:
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
def standardize_sample_wise(dataset):
    standardized = []
    for s in dataset:
        mean, std = s.mean(), s.std()
        s = (s - mean) / std
        standardized.append(s)
    return np.array(standardized)

x_train = standardize_sample_wise(x_train)
x_test = standardize_sample_wise(x_test)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

60000 train samples
10000 test samples


In [7]:
start_time = time.time()
counter = 0
total_items = len(activations) * len(optimizers) * experiments * len(lrs)

for lr in lrs:
    for act in activations:
        for opt in optimizers:
            print("Training for activation " + act + " with optimizer " + opt + " and lr = " + str(lr))
            for i in range(experiments):

                act_dict = {
                  'sigmoid': Activation(keras.activations.sigmoid),
                  'selu': Activation(keras.activations.selu),
                }

                opt_dict = {
                  'sgd': keras.optimizers.SGD(lr=lr),
                  'Adamax': keras.optimizers.Adamax(lr=lr),
                }


                model_name = 'std_fixed_' + str(lr) + "_" + act + "_" + opt + '_' + str(i + start)
                inputs = Input(shape=(784,))
                x = Dense(units, name = 'dense_1')(inputs)
                x = act_dict[act](x)
                x = Dropout(0.2)(x)
                x = Dense(units, name = 'dense_2')(x)
                x = act_dict[act](x)
                x = Dropout(0.2)(x)
                predictions = Dense(num_classes, activation='softmax', name = 'dense_output')(x)
                model = Model(inputs=inputs, outputs=predictions)

                model.compile(loss='categorical_crossentropy',
                                  optimizer=opt_dict[opt],
                                  metrics=['accuracy'])
                print('-'*30)
                print('Experiment', i)

                csv_logger = CSVLogger('./training_logs/%s_%d.csv' % (model_name, units), append=False)
                history = model.fit(x_train, y_train,
                                    batch_size=batch_size,
                                    epochs=epochs,
                                    verbose=0,
                                    validation_data=(x_test, y_test), callbacks=[csv_logger])

                score = model.evaluate(x_test, y_test, verbose=0)
                print('Test accuracy:', score[1])

                t = time.time()
                time_diff = t - start_time
                counter +=1
                rem_items = total_items - counter
                total_time = round((total_items / counter) * time_diff)
                rem_time = round(total_time - time_diff)
                m, s = divmod(rem_time, 60)
                h, m = divmod(m, 60)
                d, h = divmod(h, 24)
                print('Remaining time: %d days %d hours %02d minutes %02d seconds' % (d, h, m, s))

Training for activation selu with optimizer Adamax and lr = 0.1
------------------------------
Experiment 0
Test accuracy: 0.8379
Remaining time: 0 days 3 hours 17 minutes 28 seconds
------------------------------
Experiment 1
Test accuracy: 0.8175
Remaining time: 0 days 3 hours 07 minutes 25 seconds
------------------------------
Experiment 2
Test accuracy: 0.5662
Remaining time: 0 days 2 hours 57 minutes 58 seconds
------------------------------
Experiment 3
Test accuracy: 0.098
Remaining time: 0 days 2 hours 47 minutes 56 seconds
------------------------------
Experiment 4
Test accuracy: 0.6194
Remaining time: 0 days 2 hours 37 minutes 52 seconds
Training for activation selu with optimizer sgd and lr = 0.1
------------------------------
Experiment 0
Test accuracy: 0.9781
Remaining time: 0 days 2 hours 26 minutes 20 seconds
------------------------------
Experiment 1
Test accuracy: 0.982
Remaining time: 0 days 2 hours 15 minutes 19 seconds
------------------------------
Experiment 2


Test accuracy: 0.9816
Remaining time: -1 days 16 hours 17 minutes 06 seconds
------------------------------
Experiment 2
Test accuracy: 0.9814
Remaining time: -1 days 16 hours 05 minutes 24 seconds
------------------------------
Experiment 3
Test accuracy: 0.9822
Remaining time: -1 days 15 hours 53 minutes 34 seconds
------------------------------
Experiment 4
Test accuracy: 0.9813
Remaining time: -1 days 15 hours 41 minutes 39 seconds
Training for activation selu with optimizer sgd and lr = 0.001
------------------------------
Experiment 0
Test accuracy: 0.9708
Remaining time: -1 days 15 hours 30 minutes 19 seconds
------------------------------
Experiment 1
Test accuracy: 0.9671
Remaining time: -1 days 15 hours 18 minutes 55 seconds
------------------------------
Experiment 2
Test accuracy: 0.9691
Remaining time: -1 days 15 hours 07 minutes 24 seconds
------------------------------
Experiment 3
Test accuracy: 0.9673
Remaining time: -1 days 14 hours 55 minutes 49 seconds
-------------