In [6]:
import keras
import keras.backend as K
from keras.datasets import mnist
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Dropout, Activation
from keras.optimizers import RMSprop
from keras.callbacks import CSVLogger
import os
import keras.activations
import time
import numpy as np

In [7]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

num_classes = 10

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

60000 train samples
10000 test samples


In [8]:
def standardize_sample_wise(dataset):
    standardized = []
    for s in dataset:
        mean, std = s.mean(), s.std()
        s = (s - mean) / std
        standardized.append(s)
    return np.array(standardized)

x_train = standardize_sample_wise(x_train)
x_test = standardize_sample_wise(x_test)

In [9]:
batch_size = 64
epochs = 100
units = 64
experiments = 5
start = 0
# activations = ['sigmoid', 'tanh', 'relu', 'linear', 'elu', 'softplus', 'softsign', 'hard_sigmoid', 'LeakyReLU', 'ThresholdedReLU']
# activations = ['elu']
configs = [
    {
        'act': 'elu',
        'val': 0.5
    },
    {
        'act': 'elu',
        'val': 1.5
    },
    {
        'act': 'LeakyReLU',
        'val': 0.01
    },
    {
        'act': 'LeakyReLU',
        'val': 1.5
    },
    {
        'act': 'ThresholdedReLU',
        'val': 1.0
    },
    {
        'act': 'ThresholdedReLU',
        'val': 0.0
    }
]

In [10]:
start_time = time.time()
counter = 0
total_items = len(configs) * experiments

for cfg in configs:
    

    for i in range(experiments):
        
        print("Training for activation %s, %s with RMSProp optimizer, experiment %d" % (cfg['act'], str(cfg['val']), i))

        K.clear_session()
        K.reset_uids()

        act_dict = {
          'sigmoid': Activation(keras.activations.sigmoid),
          'tanh': Activation(keras.activations.tanh),
          'relu': Activation(keras.activations.relu),
          'linear': Activation(keras.activations.linear),
          'elu': keras.layers.ELU(alpha=cfg['val']),
          'softplus': Activation(keras.activations.softplus),
          'softsign': Activation(keras.activations.softsign),
          'hard_sigmoid': Activation(keras.activations.hard_sigmoid),
          'LeakyReLU': keras.layers.advanced_activations.LeakyReLU(alpha=cfg['val']),
          'selu': Activation(keras.activations.selu),
          'ThresholdedReLU': keras.layers.advanced_activations.ThresholdedReLU(theta=cfg['val']) 
        }

        model_name = 'std_sample_wise_' + cfg['act'] + '_' + str(cfg['val']) + '_rmsp_' + str(i + start) + '_' + str(units)
        inputs = Input(shape=(784,))
        x = Dense(units)(inputs)
        x = act_dict[cfg['act']](x)
        x = Dropout(0.2)(x)
        x = Dense(units)(x)
        x = act_dict[cfg['act']](x)
        x = Dropout(0.2)(x)
        predictions = Dense(num_classes, activation='softmax')(x)
        model = Model(inputs=inputs, outputs=predictions)

        model.compile(loss='categorical_crossentropy',
                          optimizer=RMSprop(lr=0.001),
                          metrics=['accuracy'])

        csv_logger = CSVLogger('./training_logs/%s.csv' % (model_name), append=False)
        history = model.fit(x_train, y_train,
                            batch_size=batch_size,
                            epochs=epochs,
                            verbose=0,
                            validation_data=(x_test, y_test), callbacks=[csv_logger])

        score = model.evaluate(x_test, y_test, verbose=0)
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])

        t = time.time()
        time_diff = t - start_time
        counter +=1
        rem_items = total_items - counter
        total_time = round((total_items / counter) * time_diff)
        rem_time = round(total_time - time_diff)
        m, s = divmod(rem_time, 60)
        h, m = divmod(m, 60)
        d, h = divmod(h, 24)
        print('Remaining time: %d days %d hours %02d minutes %02d seconds' % (d, h, m, s))

Training for activation elu, 0.5 with RMSProp optimizer, experiment 0
Test loss: 0.1709163382929717
Test accuracy: 0.9765
Remaining time: 0 days 2 hours 35 minutes 23 seconds
Training for activation elu, 0.5 with RMSProp optimizer, experiment 1
Test loss: 0.15889077323871434
Test accuracy: 0.9775
Remaining time: 0 days 2 hours 26 minutes 55 seconds
Training for activation elu, 0.5 with RMSProp optimizer, experiment 2
Test loss: 0.16889000464188775
Test accuracy: 0.9762
Remaining time: 0 days 2 hours 20 minutes 02 seconds
Training for activation elu, 0.5 with RMSProp optimizer, experiment 3
Test loss: 0.1920109404593904
Test accuracy: 0.9723
Remaining time: 0 days 2 hours 14 minutes 21 seconds
Training for activation elu, 0.5 with RMSProp optimizer, experiment 4
Test loss: 0.20258266432400124
Test accuracy: 0.9728
Remaining time: 0 days 2 hours 12 minutes 25 seconds
Training for activation elu, 1.5 with RMSProp optimizer, experiment 0
Test loss: 0.12191317524476163
Test accuracy: 0.9767