# BitTiger DS501 Week10

## Feedforward Neural Net (FNN)

In [None]:
from __future__ import print_function

import keras.callbacks as cb
from keras.datasets import mnist
from keras.layers.core import Activation, Dense, Dropout
from keras.models import Sequential
from keras.optimizers import SGD
from keras.regularizers import l1, l2
from keras.utils import np_utils

import numpy as np
import time
import matplotlib.pyplot as plt
%matplotlib inline

## 1. Pre-processing

### 分成 3 個 Groups:
1. A: 沒有 pre-processing
2. B: 用 normalization 把 dataset normalize 到 [0, 1] 之間
\begin{equation}x' = \frac{x - x_{min}}{x_{max} - x_{min}}\end{equation}
3. C: 用 standardization 把 dataset 變成 mean=0, variance=1 的高斯分佈
\begin{equation}x_i' = \frac{x_i - \mu}{\sigma}\end{equation}

In [None]:
def Preprocessing(group):
    from sklearn import preprocessing
    # Load MNIST dataset from Keras (https://keras.io/datasets/)
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    
    # Transform labels to one-hot
    y_train = np_utils.to_categorical(y_train, 10)
    y_test = np_utils.to_categorical(y_test, 10)
    
    # Set features to numeric type
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    # Reshape from 28 x 28 to 1-D vector
    X_train = np.reshape(X_train, (60000, 784))
    X_test = np.reshape(X_test, (10000, 784))
    
    if group == 'A':
        return X_train, X_test, y_train, y_test
    elif group == 'B':
        X_train /= 255
        X_test /= 255
    elif group == 'C':
        X_train = preprocessing.scale(X_train)
        X_test = preprocessing.scale(X_test)
        
    return X_train, X_test, y_train, y_test

In [None]:
group = 'B'
X_train, X_test, y_train, y_test = Preprocessing(group)

### 檢查一下 training data

只看前 10 筆資料

In [1]:
print('{:^43}'.format('X'), '|', '{:^4}'.format('y'))
print('='*50)
for row in range(10):
    print('{:.2f} {.2f} ... {:.2f} {:.2f} {:.2f}  ... {:.2f} {:.2f} '.format(
            X_train[row][0], X_train[row][1],
            X_train[row][156], X_train[row][157], X_train[row][158],
            X_train[row][-2], X_train[-1]), '| ',
            '{:.0f}'.format(y_train[row][0])
         )

                     X                      |  y  


## 2. Define model

### Network Structure

經驗上：寬度 < 1000 個 neurons，深度 10 ~ 30 層，形狀用金字塔型

深度可以用 sample/parameter raton 來估計，約 5 ~ 30

1. A: 1 layer
2. B: 2 layers, tower-shaped (一樣寬)
3. C: 2 layers, pyramid-shaped (越來越窄)

In [None]:
# A
first_layer_width = 128
second_layer_width = 0
# B
first_layer_width = 128
second_layer_width = 128
# C
first_layer_width = 128
second_layer_width = 64

### Activation Function
1. ReLU
2. Sigmoid
3. Tanh

In [None]:
activation_func = 'relu'
activation_func = 'sigmoid'
activation_func = 'tanh'

### Loss Function
1. Cross entropy
2. Squared error (MSE)

In [None]:
loss_func = 'categorical_crossentropy'
loss_func = 'mean_squared_error'

### Dropout rate
可以輸入想要的 dropout rate，例如 0%, 50%, 90% 等

### Regularization
1. No regularization: no
2. L1-norm: l1
3. L2-norm: l2

### Mini-batch size
可以設定 mini-batch 的大小，例如 128, 256, 512 等

通常是 4 的倍數，這和硬體 cache 的設計有關 

### Learning Rate
可以設定 learning rate 的大小，例如 0.1, 0.01, 0.5 等

通常介於 0.01 到 0.1 之間

同一個 epoch 的 learning rate 是常數

### 這邊才是定義 model

In [None]:
def DefineModel():
    # Initialize
    model = Sequential()
    
    # Add first hidden layer
    model.add(Dense(first_layer_width, input_dim=784, W_regularizer=weight_regularizer))
    model.add(Activation(activation_func))
    if dropout_rate > 0:
        model.add(Dropout(0.5))
        
    # Add second hidden layer
    if second_layer_width > 0:
        model.add(Dense(second_layer_width))
        model.add(Activation(activation_func))
        if droup_rate > 0:
            model.add(Dropout(0.5))
            
    # Last layer has the same dimension as the number of classes
    model.add(Dense(10))
    # Then add softmax
    model.add(Activation('softmax'))
    # Define optimizer
    opt = SGD(lr=learning_rate, clipnorm=5.)
    # Define loss function
    model.compile(loss=loss_function, optimizer=opt, metrics=['accuracy'])
    
    return model