In [10]:
import keras
import keras.backend as K
import numpy as np

import utils
import loggingreporter 

cfg = {}
cfg['SGD_BATCHSIZE']    = 128
cfg['SGD_LEARNINGRATE'] = 0.001
cfg['NUM_EPOCHS']       = 5000

cfg['ACTIVATION'] = 'relu'
# cfg['ACTIVATION'] = 'tanh'
# How many hidden neurons to put into each of the layers

# Try CNN? BUT by looking at file loggingreporter.py, line 32,33, this piece of code seems only works for dense layer
# Why 20, not 2^n, e.g., 16, 32

# cfg['LAYER_DIMS'] = [1024, 20, 20, 20]
#cfg['LAYER_DIMS'] = [32, 28, 24, 20, 16, 12, 8, 8]
cfg['LAYER_DIMS'] = [64, 32, 32, 16, 16] # 0.967 w. 128
#cfg['LAYER_DIMS'] = [20, 20, 20, 20, 20, 20] # 0.967 w. 128
ARCH_NAME =  '-'.join(map(str,cfg['LAYER_DIMS']))
trn, tst = utils.get_mnist()

# Seems the code does not support CNN, in the MINST experiment, they unrolled the 28x28 image to 1x784 format...
# so dense network can be directly applied to MINST data. Haven't considerred RNN. But if we were to  implement it, 
# we need to rewrite the functions in loggingreporter.py, not sure we have the time to do it..
# Nonetheless, I assume Pokeman is a good alternative.
# If we really want to test CNN, then the Gaussian kernel used to estimate MI becomes 2D, kde.py file is easy to modify, but not loggingreporter.py

# Where to save activation and weights data
cfg['SAVE_DIR'] = 'rawdata/' + cfg['ACTIVATION'] + '_' + ARCH_NAME 

(10000, 784)


In [5]:
# We should keep the initialization and optimizer the same across different experiments to remove other potential influences
# IBnet uses truncatedNormal and Adam

input_layer  = keras.layers.Input((trn.X.shape[1],))

clayer = input_layer

# Try L2norm, dropout here
for n in cfg['LAYER_DIMS']:
    clayer = keras.layers.Dense(n, 
                                activation=cfg['ACTIVATION'],
                                kernel_initializer=keras.initializers.TruncatedNormal(mean=0.0, stddev=1/np.sqrt(float(n)), seed=10),
                                bias_initializer='zeros'
#                                 kernel_regularizer=regularizers.l2(0.01) # add L2 norm regularization
                               )(clayer)
#     clayer = keras.layers.Dropout(0.1)(clayer)
output_layer = keras.layers.Dense(trn.nb_classes, activation='softmax')(clayer)

model = keras.models.Model(inputs=input_layer, outputs=output_layer)
# optimizer = keras.optimizers.SGD(lr=cfg['SGD_LEARNINGRATE'])

sgd = keras.optimizers.SGD(lr=cfg['SGD_LEARNINGRATE'], momentum=0.9, nesterov=True)

model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 784)               0         
_________________________________________________________________
dense_11 (Dense)             (None, 1024)              803840    
_________________________________________________________________
dense_12 (Dense)             (None, 20)                20500     
_________________________________________________________________
dense_13 (Dense)             (None, 20)                420       
_________________________________________________________________
dense_14 (Dense)             (None, 20)                420       
_________________________________________________________________
dense_15 (Dense)             (None, 10)                210       
Total params: 825,390
Trainable params: 825,390
Non-trainable params: 0
_________________________________________________________________


In [None]:
def do_report(epoch):
    # Only log activity for some epochs.  Mainly this is to make things run faster.
    if epoch < 20:       # Log for all first 20 epochs
        return True
    elif epoch < 100:    # Then for every 5th epoch
        return (epoch % 5 == 0)
    elif epoch < 200:    # Then every 10th
        return (epoch % 10 == 0)
    else:                # Then every 100th
        return (epoch % 100 == 0)
    
reporter = loggingreporter.LoggingReporter(cfg=cfg, 
                                          trn=trn, 
                                          tst=tst, 
                                          do_save_func=do_report)
r = model.fit(x=trn.X, y=trn.Y, 
              verbose    = 2, 
              batch_size = cfg['SGD_BATCHSIZE'],
              epochs     = cfg['NUM_EPOCHS'],
              validation_split = 0.2,
              shuffle = True,
              # validation_data=(tst.X, tst.Y),
              callbacks  = [reporter,])


In [None]:
import matplotlib.pyplot as plt
matplotlib.rc("savefig", dpi=300)
%matplotlib inline

NUM_EPOCHS = cfg['NUM_EPOCHS']
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))

# summarize history for accuracy
axes[0].plot(range(1,NUM_EPOCHS+1), r.history['acc'], '-x')
axes[0].plot(range(1,NUM_EPOCHS+1), r.history['val_acc'], '-+')
axes[0].set_title('Model accuracy')
axes[0].set_ylabel('Accuracy')
axes[0].set_xlabel('Epoch')
axes[0].set_xticks(range(1,NUM_EPOCHS+1))
axes[0].grid(linestyle='-')
axes[0].legend(['train', 'validation'], loc='upper left')

# summarize history for loss
axes[1].plot(range(1,NUM_EPOCHS+1), r.history['loss'], '-x')
axes[1].plot(range(1,NUM_EPOCHS+1), r.history['val_loss'], '-+')
axes[1].set_title('Model loss')
axes[1].set_ylabel('Loss')
axes[1].set_xlabel('Epoch')
axes[1].set_xticks(range(1,NUM_EPOCHS+1))
axes[1].grid(linestyle='-')
axes[1].legend(['train', 'validation'], loc='upper left')

plt.savefig('MNIST_acc_loss_plot', bbox_inches='tight')