In [1]:
import keras
import keras.backend as K
from keras.datasets import mnist
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Dropout, Activation
from keras.optimizers import RMSprop
from keras.callbacks import CSVLogger
import os
import keras.activations
import time
import numpy as np

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

num_classes = 10

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

def standardize_sample_wise(dataset):
    standardized = []
    for s in dataset:
        mean, std = s.mean(), s.std()
        s = (s - mean) / std
        standardized.append(s)
    return np.array(standardized)

x_train = standardize_sample_wise(x_train)
x_test = standardize_sample_wise(x_test)

60000 train samples
10000 test samples


In [12]:
batch_size = 64
epochs = 100
units = 64
experiments = 5
start = 0
# activations = ['sigmoid', 'tanh', 'relu', 'elu', 'selu', 'softplus', 'softsign', 'hard_sigmoid', 'LeakyReLU', 'ThresholdedReLU']
activations = ['ThresholdedReLU']

In [13]:
configs = [

#     { 'optimizer': 'sgd', 'momentum': 0.1, 'decay': 0.0, 'nesterov': False },
#     { 'optimizer': 'sgd', 'momentum': 0.9, 'decay': 0.0, 'nesterov': False },
#     { 'optimizer': 'sgd', 'momentum': 0.95, 'decay': 0.0, 'nesterov': False },
#     { 'optimizer': 'sgd', 'momentum': 0.99, 'decay': 0.0, 'nesterov': False },
#     { 'optimizer': 'sgd', 'momentum': 0.999, 'decay': 0.0, 'nesterov': False },

#     { 'optimizer': 'sgd', 'momentum': 0.0, 'decay': 0.0, 'nesterov': True },
#     { 'optimizer': 'sgd', 'momentum': 0.1, 'decay': 0.0, 'nesterov': True },
#     { 'optimizer': 'sgd', 'momentum': 0.9, 'decay': 0.0, 'nesterov': True },
#     { 'optimizer': 'sgd', 'momentum': 0.95, 'decay': 0.0, 'nesterov': True },
#     { 'optimizer': 'sgd', 'momentum': 0.99, 'decay': 0.0, 'nesterov': True },
#     { 'optimizer': 'sgd', 'momentum': 0.999, 'decay': 0.0, 'nesterov': True },
    
#     { 'optimizer': 'rmsp', 'rho': 0.5, 'decay': 0.0 },
#     { 'optimizer': 'rmsp', 'rho': 0.99, 'decay': 0.0 },
#     { 'optimizer': 'rmsp', 'rho': 0.95, 'decay': 0.0 },
#     { 'optimizer': 'rmsp', 'rho': 0.999, 'decay': 0.0 },
#     { 'optimizer': 'rmsp', 'rho': 0.9999, 'decay': 0.0 },

#     { 'optimizer': 'Adadelta', 'rho': 0.5, 'decay': 0.0 },
#     { 'optimizer': 'Adadelta', 'rho': 0.9, 'decay': 0.0 },
#     { 'optimizer': 'Adadelta', 'rho': 0.99, 'decay': 0.0 },
#     { 'optimizer': 'Adadelta', 'rho': 0.999, 'decay': 0.0 },
#     { 'optimizer': 'Adadelta', 'rho': 0.9999, 'decay': 0.0 },
    
#     { 'optimizer': 'adam', 'amsgrad': False, 'beta_1': 0.95, 'beta_2': 0.9999, 'decay': 0.0 },
#     { 'optimizer': 'adam', 'amsgrad': False, 'beta_1': 0.9, 'beta_2': 0.999, 'decay': 0.0 },
#     { 'optimizer': 'adam', 'amsgrad': False, 'beta_1': 0.95, 'beta_2': 0.999, 'decay': 0.0 },
#     { 'optimizer': 'adam', 'amsgrad': False, 'beta_1': 0.9, 'beta_2': 0.9999, 'decay': 0.0 },
#     { 'optimizer': 'adam', 'amsgrad': True, 'beta_1': 0.95, 'beta_2': 0.9999, 'decay': 0.0 },
#     { 'optimizer': 'adam', 'amsgrad': True, 'beta_1': 0.9, 'beta_2': 0.999, 'decay': 0.0 },
#     { 'optimizer': 'adam', 'amsgrad': True, 'beta_1': 0.95, 'beta_2': 0.999, 'decay': 0.0 },
#     { 'optimizer': 'adam', 'amsgrad': True, 'beta_1': 0.9, 'beta_2': 0.9999, 'decay': 0.0 },
    
#     { 'optimizer': 'Adamax', 'beta_1': 0.99, 'beta_2': 0.9999, 'decay': 0.0 },    
#     { 'optimizer': 'Adamax', 'beta_1': 0.99, 'beta_2': 0.999, 'decay': 0.0 },    
#     { 'optimizer': 'Adamax', 'beta_1': 0.95, 'beta_2': 0.999, 'decay': 0.0 },      
#     { 'optimizer': 'Adamax', 'beta_1': 0.95, 'beta_2': 0.99, 'decay': 0.0 },      
#     { 'optimizer': 'Adamax', 'beta_1': 0.95, 'beta_2': 0.95, 'decay': 0.0 },      
#     { 'optimizer': 'Adamax', 'beta_1': 0.9, 'beta_2': 0.9, 'decay': 0.0 },    
#     { 'optimizer': 'Adamax', 'beta_1': 0.9, 'beta_2': 0.99, 'decay': 0.0 },    
#     { 'optimizer': 'Adamax', 'beta_1': 0.9, 'beta_2': 0.95, 'decay': 0.0 },        
    
#     { 'optimizer': 'Nadam', 'beta_1': 0.9, 'beta_2': 0.99, 'schedule_decay': 0.004 },        
#     { 'optimizer': 'Nadam', 'beta_1': 0.95, 'beta_2': 0.999, 'schedule_decay': 0.004 },        
#     { 'optimizer': 'Nadam', 'beta_1': 0.95, 'beta_2': 0.99, 'schedule_decay': 0.004 },        
#     { 'optimizer': 'Nadam', 'beta_1': 0.95, 'beta_2': 0.95, 'schedule_decay': 0.004 },        
#     { 'optimizer': 'Nadam', 'beta_1': 0.95, 'beta_2': 0.9, 'schedule_decay': 0.004 },        
#     { 'optimizer': 'Nadam', 'beta_1': 0.9, 'beta_2': 0.9, 'schedule_decay': 0.004 },        
    { 'optimizer': 'Nadam', 'beta_1': 0.9, 'beta_2': 0.95, 'schedule_decay': 0.004 },        

]

In [14]:
start_time = time.time()
counter = 0
total_items = len(activations) * experiments * len(configs)

for cfg in configs:

    for act in activations:

        for i in range(experiments):
            
            print("Training for activation %s with config %s, experiment %d" % (act, str(cfg), i))
            
            K.clear_session()
            K.reset_uids()
            
            act_dict = {
              'sigmoid': Activation(keras.activations.sigmoid),
              'tanh': Activation(keras.activations.tanh),
              'relu': Activation(keras.activations.relu),
              'linear': Activation(keras.activations.linear),
              'elu': Activation(keras.activations.elu),
              'softplus': Activation(keras.activations.softplus),
              'softsign': Activation(keras.activations.softsign),
              'hard_sigmoid': Activation(keras.activations.hard_sigmoid),
              'LeakyReLU': keras.layers.advanced_activations.LeakyReLU(),
              'selu': Activation(keras.activations.selu),
              'ThresholdedReLU': keras.layers.advanced_activations.ThresholdedReLU(theta=0.7) 
            }

            if cfg['optimizer'] == 'sgd':
                optimizer = keras.optimizers.SGD(momentum=cfg['momentum'], decay=cfg['decay'], nesterov=cfg['nesterov'])
                model_name = '_'.join(['std_sample_wise', act, cfg['optimizer'], 
                                       str(cfg['momentum']), str(cfg['decay']), 
                                       str(cfg['nesterov']), str(i + start), str(units)])
            
            elif cfg['optimizer'] == 'rmsp':
                optimizer = keras.optimizers.rmsprop(lr=0.001, rho=cfg['rho'], decay=cfg['decay'])
                model_name = '_'.join(['std_sample_wise', act, cfg['optimizer'], 
                                       str(cfg['rho']), str(cfg['decay']), 
                                       str(i + start), str(units)])
            
            elif cfg['optimizer'] == 'Adadelta':
                optimizer = keras.optimizers.Adadelta(rho=cfg['rho'], decay=cfg['decay'])
                model_name = '_'.join(['std_sample_wise', act, cfg['optimizer'], 
                                       str(cfg['rho']), str(cfg['decay']), 
                                       str(i + start), str(units)])
            
            elif cfg['optimizer'] == 'adam':
                optimizer = keras.optimizers.Adam(amsgrad=cfg['amsgrad'], decay=cfg['decay'], 
                                                  beta_1=cfg['beta_1'], beta_2=cfg['beta_2'])
                model_name = '_'.join(['std_sample_wise', act, cfg['optimizer'], 
                                       str(cfg['amsgrad']), str(cfg['beta_1']), str(cfg['beta_2']), 
                                       str(cfg['decay']), str(i + start), str(units)])
                
            elif cfg['optimizer'] == 'Adagrad':
                optimizer = keras.optimizers.Adagrad(decay=cfg['decay'])
                model_name = '_'.join(['std_sample_wise', act, cfg['optimizer'], 
                                       str(cfg['decay']), str(i + start), str(units)])
                
            elif cfg['optimizer'] == 'Adamax':
                optimizer = keras.optimizers.Adamax(decay=cfg['decay'], 
                                                  beta_1=cfg['beta_1'], beta_2=cfg['beta_2'])
                model_name = '_'.join(['std_sample_wise', act, cfg['optimizer'], str(cfg['beta_1']), str(cfg['beta_2']),
                                       str(cfg['decay']), str(i + start), str(units)])
                
            elif cfg['optimizer'] == 'Nadam':
                optimizer = keras.optimizers.Nadam(schedule_decay=cfg['schedule_decay'], 
                                                  beta_1=cfg['beta_1'], beta_2=cfg['beta_2'])
                model_name = '_'.join(['std_sample_wise', act, cfg['optimizer'], str(cfg['beta_1']), str(cfg['beta_2']), 
                                       str(cfg['schedule_decay']), str(i + start), str(units)])
                
                
            inputs = Input(shape=(784,))
            x = Dense(units)(inputs)
            x = act_dict[act](x)
            x = Dropout(0.2)(x)
            x = Dense(units)(x)
            x = act_dict[act](x)
            x = Dropout(0.2)(x)
            predictions = Dense(num_classes, activation='softmax')(x)
            model = Model(inputs=inputs, outputs=predictions)
                
            model.compile(loss='categorical_crossentropy',
                              optimizer=optimizer,
                              metrics=['accuracy'])

            csv_logger = CSVLogger('./training_logs/custom/%s.csv' % (model_name), append=False)
            history = model.fit(x_train, y_train,
                                batch_size=batch_size,
                                epochs=epochs,
                                verbose=0,
                                validation_data=(x_test, y_test), callbacks=[csv_logger])

#             score = model.evaluate(x_test, y_test, verbose=0)
#             print('Test loss:', score[0])
#             print('Test accuracy:', score[1])
            
            t = time.time()
            time_diff = t - start_time
            counter +=1
            rem_items = total_items - counter
            total_time = round((total_items / counter) * time_diff)
            rem_time = round(total_time - time_diff)
            m, s = divmod(rem_time, 60)
            h, m = divmod(m, 60)
            d, h = divmod(h, 24)
            print('Remaining time: %d days %d hours %02d minutes %02d seconds' % (d, h, m, s))

Training for activation ThresholdedReLU with config {'optimizer': 'Nadam', 'beta_1': 0.9, 'beta_2': 0.95, 'schedule_decay': 0.004}, experiment 0
Remaining time: 0 days 0 hours 15 minutes 40 seconds
Training for activation ThresholdedReLU with config {'optimizer': 'Nadam', 'beta_1': 0.9, 'beta_2': 0.95, 'schedule_decay': 0.004}, experiment 1
Remaining time: 0 days 0 hours 11 minutes 35 seconds
Training for activation ThresholdedReLU with config {'optimizer': 'Nadam', 'beta_1': 0.9, 'beta_2': 0.95, 'schedule_decay': 0.004}, experiment 2
Remaining time: 0 days 0 hours 07 minutes 42 seconds
Training for activation ThresholdedReLU with config {'optimizer': 'Nadam', 'beta_1': 0.9, 'beta_2': 0.95, 'schedule_decay': 0.004}, experiment 3
Remaining time: 0 days 0 hours 03 minutes 50 seconds
Training for activation ThresholdedReLU with config {'optimizer': 'Nadam', 'beta_1': 0.9, 'beta_2': 0.95, 'schedule_decay': 0.004}, experiment 4
Remaining time: 0 days 0 hours 00 minutes 00 seconds
