In [2]:
'''Trains a simple deep NN on the MNIST dataset.
Gets to 98.40% test accuracy after 20 epochs
(there is *a lot* of margin for parameter tuning).
2 seconds per epoch on a K520 GPU.
'''
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.callbacks import CSVLogger, TensorBoard, ReduceLROnPlateau, EarlyStopping
import os
from keras.layers.noise import AlphaDropout
import keras.activations

In [3]:
batch_size = 64
num_classes = 10
epochs = 30
units = 64
experiments = 1
start = 0
# activations = ['sigmoid', 'tanh', 'relu', 'linear', 'elu', 'softplus', 'softsign', 'hard_sigmoid', 'LeakyReLU', 'ThresholdedReLU']
activations = ['hard_sigmoid']
# PReLU is not used, since it does not currently support variable inputs
# optimizers = ['rmsp', 'adam', 'sgd', 'Adagrad', 'Adadelta', 'Adamax', 'Nadam']
optimizers = ['Nadam']
save_dir = os.path.join(os.getcwd(), 'saved_models')


In [4]:
img_rows, img_cols = 28, 28

# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


## Train per each activation function

In [None]:
for act in activations:
    for opt in optimizers:
        print("\nTraining for activation " + act + " with optimizer " + opt)

        # Selecting activation function
        if act == 'sigmoid':
            activation = Activation(keras.activations.sigmoid)
        elif act == 'tanh':
            activation = Activation(keras.activations.tanh)
        elif act == 'relu':
            activation = Activation(keras.activations.relu)
        elif act == 'linear':
            activation = Activation(keras.activations.linear)
        elif act == 'elu':
            activation = Activation(keras.activations.elu)
        elif act == 'softplus':
            activation = Activation(keras.activations.softplus)
        elif act == 'softsign':
            activation = Activation(keras.activations.softsign)
        elif act == 'hard_sigmoid':
            activation = Activation(keras.activations.hard_sigmoid)
        elif act == 'LeakyReLU':
            activation = keras.layers.advanced_activations.LeakyReLU()
        elif act == 'PReLU':
            activation = keras.layers.advanced_activations.PReLU()
        elif act == 'ThresholdedReLU':
            activation = keras.layers.advanced_activations.ThresholdedReLU(theta=0.7)
            

        if opt == 'rmsp':
            optimizer = keras.optimizers.rmsprop()                
        elif opt == 'adam':
            optimizer = keras.optimizers.Adam()
        elif opt == 'sgd':
            optimizer = keras.optimizers.SGD()
        elif opt == 'Adagrad':
            optimizer = keras.optimizers.Adagrad()
        elif opt == 'Adadelta':
            optimizer = keras.optimizers.Adadelta()
        elif opt == 'Adamax':
            optimizer = keras.optimizers.Adamax()
        elif opt == 'Nadam':
            optimizer = keras.optimizers.Nadam()
        
        for i in range(experiments):
            model_name = 'mnist_cnn_' + act + "_" + opt + '_' + str(i + start)
            model = Sequential()
            model.add(Conv2D(32, kernel_size=(3, 3),
                             input_shape=input_shape))
            
            model.add(activation)
            model.add(Conv2D(64, kernel_size=(3, 3)))
            model.add(activation)
            model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2), padding='same'))
            model.add(Dropout(0.25))
            model.add(Flatten())
            model.add(Dense(128))
            model.add(activation)
            model.add(Dropout(0.5))
            model.add(Dense(num_classes, activation='softmax'))

            model.compile(loss='categorical_crossentropy',
                              optimizer=optimizer,
                              metrics=['accuracy'])
            
            print('-'*30)
            print('Experiment', i)

            csv_logger = CSVLogger('./logs/%s_%d.csv' % (model_name, units), append=False, separator=';')
            reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.2, patience=3, verbose=1, mode='auto', epsilon=0.0001, cooldown=2, min_lr=0)
#             tb = TensorBoard(log_dir='./tb_logs/' + model_name + '_' + str(units), histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)
            history = model.fit(x_train, y_train,
                                batch_size=batch_size,
                                epochs=epochs,
                                verbose=1,
                                validation_data=(x_test, y_test), callbacks=[csv_logger, reduce_lr])

            if not os.path.isdir(save_dir):
                os.makedirs(save_dir)
            model_path = os.path.join(save_dir, model_name + ".h5")
#             model.save(model_path)

            score = model.evaluate(x_test, y_test, verbose=0)
            print('Test loss:', score[0])
            print('Test accuracy:', score[1])


Training for activation hard_sigmoid with optimizer Nadam
------------------------------
Experiment 0
Train on 60000 samples, validate on 10000 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 00007: reducing learning rate to 0.0004000000189989805.
Epoch 8/30
Epoch 9/30
Epoch 10/30
13056/60000 [=====>........................] - ETA: 4:19 - loss: 2.3041 - acc: 0.1045

# SeLU
This one has special requirements

In [10]:
for opt in optimizers:
    print("Training for activation SeLU with optimizer " + opt)
    for i in range(experiments):
        model_name = 'mnist_cnn_selu_' + opt + '_' + str(i + start) 
        
        model = Sequential()
        model.add(Conv2D(32, kernel_size=(3, 3),
                         activation='selu', kernel_initializer='lecun_normal',
                         input_shape=input_shape))
        model.add(Conv2D(64, (3, 3), activation='selu', kernel_initializer='lecun_normal'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(AlphaDropout(0.2))
        model.add(Flatten())
        model.add(Dense(128, activation='selu', kernel_initializer='lecun_normal'))
        model.add(AlphaDropout(0.2))
        model.add(Dense(num_classes, activation='softmax'))

        if opt == 'rmsp':
            optimizer = keras.optimizers.rmsprop()                
        elif opt == 'adam':
            optimizer = keras.optimizers.Adam()
        elif opt == 'sgd':
            optimizer = keras.optimizers.SGD()
        elif opt == 'Adagrad':
            optimizer = keras.optimizers.Adagrad()
        elif opt == 'Adadelta':
            optimizer = keras.optimizers.Adadelta()
        elif opt == 'Adamax':
            optimizer = keras.optimizers.Adamax()
        elif opt == 'Nadam':
            optimizer = keras.optimizers.Nadam()


        model.compile(loss='categorical_crossentropy',
                          optimizer=optimizer,
                          metrics=['accuracy'])
        print('-'*30)
        print('Experiment', i+1)

        csv_logger = CSVLogger('./logs/%s_%d.csv' % (model_name, units), append=False, separator=';')
        reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.2, patience=3, verbose=0, mode='auto', epsilon=0.0001, cooldown=2, min_lr=0)
#         tb = TensorBoard(log_dir='./tb_logs/' + model_name + '_' + str(units), histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)
        history = model.fit(x_train, y_train,
                            batch_size=batch_size,
                            epochs=epochs,
                            verbose=1,
                            validation_data=(x_test, y_test), callbacks=[csv_logger, reduce_lr])

        if not os.path.isdir(save_dir):
            os.makedirs(save_dir)
        model_path = os.path.join(save_dir, model_name + ".h5")
#         model.save(model_path)
        score = model.evaluate(x_test, y_test, verbose=0)
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])

Training for activation SeLU with optimizer rmsp
------------------------------
Experiment 1
Train on 60000 samples, validate on 10000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test loss: 0.0904386542887
Test accuracy: 0.9911
Training for activation SeLU with optimizer adam
------------------------------
Experiment 1
Train on 60000 samples, validate on 10000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/

Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test loss: 0.0479766140209
Test accuracy: 0.9899
Training for activation SeLU with optimizer Adagrad
------------------------------
Experiment 1
Train on 60000 samples, validate on 10000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/5

Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test loss: 0.060523664322
Test accuracy: 0.9912
Training for activation SeLU with optimizer Adamax
------------------------------
Experiment 1
Train on 60000 samples, validate on 10000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50


Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test loss: 0.0816728962701
Test accuracy: 0.9912
