# Dropout 訓練神經網路的細節與技巧
1. 訓練過程中，隨機拿掉一些連結weight設為0
2. 增加訓練的難度
3. 視為一種model自身的ensemble method
4. 在訓練過程中，隨機將某些參數設定為0，強迫模型的每個參數有更強的泛化能力
5. 簡而言之，目的為降低過擬和的問題 - overfitting

In [1]:
from keras.layers import Dropout
import keras


#n_units=10
#x = keras.layers.Dense(units = n_units,activation='relu')(x)
#x = Dropout(0.2)(x)

Using TensorFlow backend.


# 練習- Coding

In [2]:
import os
import keras

train,test = keras.datasets.cifar10.load_data()

In [3]:
def preprocessing_x(x,flatten = True):
    x = x / 255.
    if flatten:
        x = x.reshape((len(x), -1))
    return x

def preprocessing_y(y,num_classes=10):
    if y.shape[-1] ==1:
        y = keras.utils.to_categorical(y, num_classes)
    return y

In [None]:
x_train,y_train = train
x_test,y_test = test

x_train = preprocessing_x(x_train)
x_test = preprocessing_x(x_test)

y_train = preprocessing_y(y_train)
y_test = preprocessing_y(y_test)

In [None]:
from keras.layers import Dropout

def build_mlp(input_shape,output_units=10, num_neurons=[512,256,128], drp_ratio=0.2):
    input_layer = keras.layers.Input(input_shape)
    
    for i, n_units in enumerate(num_neurons):
        # 第一層神經網路須依照input shape
        if i==0:
            x = keras.layers.Dense(units=n_units,activation='relu',name='hidden_layer'+str(i+1))(input_layer)
            x = Dropout(drp_ratio)(x)
        else:
            x = keras.layers.Dense(units = n_units,activation='relu',name='hidden_layer'+str(i+1))(x)
            
            x = Dropout(drp_ratio)(x)
    # 輸出的神經層必須透過softmax判斷機率        
    out = keras.layers.Dense(units=output_units,activation='softmax',name='output')(x)
    model = keras.models.Model(inputs = [input_layer],outputs=[out])
    
    return model

In [None]:
learning_rate = 1e-3
epochs = 1
batch_size = 256
momentum = 0.95
dropout_exp = 0.25

In [None]:
model = build_mlp(input_shape = x_train.shape[1:],drp_ratio = dropout_exp)
model.summary()

optimizer = keras.optimizers.SGD(lr=learning_rate, nesterov=True, momentum=momentum)
model.compile(loss="categorical_crossentropy", metrics=["accuracy"], optimizer=optimizer)

model.fit(x_train, y_train, 
          epochs=epochs, 
          batch_size=batch_size, 
          validation_data=(x_test, y_test), 
          shuffle=True)

# Collect results
train_loss = model.history.history["loss"]
valid_loss = model.history.history["val_loss"]
train_acc = model.history.history["accuracy"]
valid_acc = model.history.history["val_acc"]

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 3072)              0         
_________________________________________________________________
hidden_layer1 (Dense)        (None, 512)               1573376   
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
hidden_layer2 (Dense)        (None, 256)               131328    
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
hidden_layer3 (Dense)        (None, 128)               32896     
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)               0   

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

plt.plot(range(len(train_loss)),train_loss, label = 'train loss')
plt.plot(range(len(valid_loss)),valid_loss,label = 'valid loss')
plt.legend()
plt.title('loss')
plt.show()

plt.plot(range(len(train_acc)),train_acc,label = 'train accuracy')
plt.plot(range(len(valid_acc)),valid_acc,label='valid accuracy')
plt.legend()
plt.title('accuracy')
plt.show()