In [5]:
'''Trains a simple deep NN on the MNIST dataset.
Gets to 98.40% test accuracy after 20 epochs
(there is *a lot* of margin for parameter tuning).
2 seconds per epoch on a K520 GPU.
'''
import keras
import keras.backend as K
from keras.datasets import mnist
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Dropout, Activation
from keras.optimizers import RMSprop
from keras.callbacks import CSVLogger, TensorBoard, ReduceLROnPlateau, EarlyStopping
import os
from keras.layers.noise import AlphaDropout
import keras.activations
import time
import numpy as np

In [6]:
batch_size = 64
num_classes = 10
epochs = 200
units = 64
experiments = 5
start = 0
activations = ['selu', 'sigmoid']
optimizers = ['Adamax', 'sgd']

In [7]:
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

60000 train samples
10000 test samples


In [8]:
start_time = time.time()
counter = 0
total_items = len(activations) * experiments * len(optimizers)

for act in activations:
    for opt in optimizers:

        for i in range(experiments):
            print("Training for activation " + act + " with optimizer " + opt )

            K.clear_session()
            K.reset_uids()

            # Selecting activation function
            act_dict = {
              'sigmoid': Activation(keras.activations.sigmoid),
              'tanh': Activation(keras.activations.tanh),
              'relu': Activation(keras.activations.relu),
              'linear': Activation(keras.activations.linear),
              'elu': Activation(keras.activations.elu),
              'softplus': Activation(keras.activations.softplus),
              'softsign': Activation(keras.activations.softsign),
              'hard_sigmoid': Activation(keras.activations.hard_sigmoid),
              'LeakyReLU': keras.layers.advanced_activations.LeakyReLU(),
              'PReLU': keras.layers.advanced_activations.PReLU(),
              'selu': Activation(keras.activations.selu),
              'ThresholdedReLU': keras.layers.advanced_activations.ThresholdedReLU(theta=0.7) # As proposed in the original paper
            }

            opt_dict = {
              'rmsp': keras.optimizers.rmsprop(lr=0.001, decay=0.0001),
              'adam': keras.optimizers.Adam(decay=0.0001),
              'sgd': keras.optimizers.SGD(decay=0.0001),
              'Adagrad': keras.optimizers.Adagrad(decay=0.0001),
              'Adadelta': keras.optimizers.Adadelta(decay=0.0001),
              'Adamax': keras.optimizers.Adamax(decay=0.0001),
              'Nadam': keras.optimizers.Nadam(schedule_decay=0.0001)
            }
          
            model_name = 'normd_decay_0.0001_' + act + "_" + opt + '_' + str(i + start) + '_' + str(units)
            inputs = Input(shape=(784,))
            x = Dense(units, name = 'dense_1')(inputs)
            x = act_dict[act](x)
            x = Dropout(0.2)(x)
            x = Dense(units, name = 'dense_2')(x)
            x = act_dict[act](x)
            x = Dropout(0.2)(x)
            predictions = Dense(num_classes, activation='softmax', name = 'dense_output')(x)
            model = Model(inputs=inputs, outputs=predictions)

            model.compile(loss='categorical_crossentropy',
                              optimizer=opt_dict[opt],
                              metrics=['accuracy'])
            print('-'*30)
            print('Experiment', i)
            
            csv_logger = CSVLogger('./training_logs/%s.csv' % (model_name), append=False)
            
            history = model.fit(x_train, y_train,
                                batch_size=batch_size,
                                epochs=epochs,
                                verbose=0,
                                validation_data=(x_test, y_test), callbacks=[csv_logger])



            score = model.evaluate(x_test, y_test, verbose=0)
            print('Test accuracy:', score[1])
            
            t = time.time()
            time_diff = t - start_time
            counter +=1
            rem_items = total_items - counter
            total_time = round((total_items / counter) * time_diff)
            rem_time = round(total_time - time_diff)
            m, s = divmod(rem_time, 60)
            h, m = divmod(m, 60)
            d, h = divmod(h, 24)
            print('Remaining time: %d days %d hours %02d minutes %02d seconds' % (d, h, m, s))

Training for activation selu with optimizer Adamax
------------------------------
Experiment 0
Test accuracy: 0.9761
Remaining time: 0 days 3 hours 51 minutes 00 seconds
Training for activation selu with optimizer Adamax
------------------------------
Experiment 1
Test accuracy: 0.9742
Remaining time: 0 days 3 hours 42 minutes 37 seconds
Training for activation selu with optimizer Adamax
------------------------------
Experiment 2
Test accuracy: 0.9764
Remaining time: 0 days 3 hours 30 minutes 25 seconds
Training for activation selu with optimizer Adamax
------------------------------
Experiment 3
Test accuracy: 0.975
Remaining time: 0 days 3 hours 16 minutes 59 seconds
Training for activation selu with optimizer Adamax
------------------------------
Experiment 4
Test accuracy: 0.975
Remaining time: 0 days 3 hours 04 minutes 52 seconds
Training for activation selu with optimizer sgd
------------------------------
Experiment 0
Test accuracy: 0.9472
Remaining time: 0 days 2 hours 51 minu