# Reguralization (Dropout, Normalization)

# Library

In [None]:
import numpy as np
import tensorflow as tf

from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix

# Load Dataset and Preprocess

In [None]:
from tensorflow.keras.datasets import mnist as data

# load dataset
(x_train, y_train), (x_test, y_test) = data.load_data()
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

train_n_sample = len(x_train)
shape = x_train[0].shape[0:]
n_class = len(np.unique(y_train))
print(train_n_sample, shape, n_class)

# x reshape and x normalization
X_train = x_train.reshape(-1, np.prod(shape)) / 255.
X_test = x_test.reshape(-1, np.prod(shape)) / 255.

# y onehot
Y_train = np.eye(n_class)[y_train.reshape(-1)]
Y_test = np.eye(n_class)[y_test.reshape(-1)]

print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

# Multi-Layer Perceptron (MLP) with Regularization

## Hyper-parameter setting

In [None]:
# hyper parameter setting
training_epochs = 100
learning_rate = 1e-1
display_step = 10

batch_size = 100

hidden_nodes = [1024, 300]
activation = tf.nn.tanh
# norm

# dropout rate


## Build Model

In [None]:
# build 3-layer fully-connected neural network model
# hidden layers' size is [1024, 300]
tf.reset_default_graph()

# placeholder
tf_X = tf.placeholder(tf.float32, (None, np.prod(shape)))
tf_Y = tf.placeholder(tf.float32, (None, n_class))

# model
hidden1 = tf.layers.dense(tf_X, hidden_nodes[0])
# norm, dropout

hidden2 = tf.layers.dense(hidden1, hidden_nodes[2])
# norm, dropout

logit = tf.layers.dense(hidden2, n_class)
pred = tf.nn.softmax(logit)

# loss and optimizer
loss = tf.losses.softmax_cross_entropy(tf_Y, logit)
# batch norm optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) DON_T USE THIS

## Train model

In [None]:
with tf.Session() as sess:
    # init
    sess.run(tf.global_variables_initializer())

    # train
    epoch_losses, iter_losses = [], []
    total_batch = int( train_n_sample / batch_size )
    for epoch in range(training_epochs):
        # shuffle
        random_idx = np.random.permutation(train_n_sample)
        shuffled_X_train = X_train[random_idx]
        shuffled_Y_train = Y_train[random_idx]

        for i in range(total_batch):
            # get batch
            batch_X_train = shuffled_X_train[i*batch_size : (i+1)*batch_size]
            batch_Y_train = shuffled_Y_train[i*batch_size : (i+1)*batch_size]

            _, l = sess.run([optimizer, loss], 
                            feed_dict = {tf_X:batch_X_train, tf_Y:batch_Y_train})
            iter_losses.append(l)
        epoch_losses.append(np.mean(iter_losses[-total_batch:]))

        if (epoch + 1) % display_step == 0:
            print('Epoch: {}, \t Loss: {}'.format(epoch+1, epoch_losses[-1]))

    # result
    train_pred = sess.run(pred, feed_dict={tf_X:X_train})
    test_pred = sess.run(pred, feed_dict={tf_X:X_test})

    # save
    saver = tf.train.Saver()
    saver.save(sess, 'model')

## Plot result

In [None]:
# plot epoch losses
def plot_epoch_losses(epoch_losses):
    plt.figure()
    plt.plot(epoch_losses)
    plt.show()
plot_epoch_losses(epoch_losses)

# accuracy
def print_accuracy(label, pred, name=''):
    accuracy = np.mean(label==pred)
    print(name, ' accuracy: ', accuracy)
print_accuracy(y_train.reshape(-1), np.argmax(train_pred, axis=1), 'train')
print_accuracy(y_test.reshape(-1), np.argmax(test_pred, axis=1), 'test')

# confusion matrix
def print_confusion_matrix(label, pred):
    matrix = confusion_matrix(label, pred)
    print(matrix)
print_confusion_matrix(y_test.reshape(-1), np.argmax(test_pred, axis=1))