In [1]:
import numpy as np
import pandas as pd
 
import keras
import keras.backend as K
from keras.layers import Input, Convolution2D, Activation, MaxPooling2D, Dense, BatchNormalization, Dropout
from keras.layers.core import Flatten
from keras.optimizers import SGD
from keras.models import Model
from keras.utils import np_utils
from keras.constraints import maxnorm
from keras.regularizers import l2
from keras.callbacks import LearningRateScheduler
from keras.layers.normalization import BatchNormalization

print(keras.__version__)

Using TensorFlow backend.


2.1.4


In [2]:
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

In [3]:
N_train = 30000
training_inputs = X_train[0:N_train,:,:] / 255.0
training_targets = np_utils.to_categorical(y_train)[0:N_train]

val_inputs = X_train[(N_train+1):42000,:,:] / 255.0
val_targets = np_utils.to_categorical(y_train)[(N_train+1):42000]

In [4]:
training_inputs = training_inputs.reshape(training_inputs.shape[0], 784)
val_inputs = val_inputs.reshape(val_inputs.shape[0], 784)

### Layer by layer pretraining Models (greedy layer-wise training)

#### Layer 1

In [5]:
input_img = Input(shape = (784, ))
distorted_input1 = Dropout(.1)(input_img)
encoded1 = Dense(800, activation = 'sigmoid')(distorted_input1)
encoded1_bn = BatchNormalization()(encoded1)
decoded1 = Dense(784, activation = 'sigmoid')(encoded1_bn)

autoencoder1 = Model(input=input_img, output=decoded1)
encoder1 = Model(input=input_img, output=encoded1_bn)



#### Layer 2

In [6]:
encoded1_input = Input(shape = (800,))
distorted_input2 = Dropout(.2)(encoded1_input)
encoded2 = Dense(400, activation='sigmoid')(distorted_input2)
encoded2_bn = BatchNormalization()(encoded2)
decoded2 = Dense(800, activation='sigmoid')(encoded2_bn)

autoencoder2 = Model(input=encoded1_input, output=decoded2)
encoder2 = Model(input=encoded1_input, output=encoded2_bn)



#### Layer 3

In [7]:
encoded2_input = Input(shape = (400,))
distorted_input3 = Dropout(.3)(encoded2_input)
encoded3 = Dense(200, activation='sigmoid')(distorted_input3)
encoded3_bn = BatchNormalization()(encoded3)
decoded3 = Dense(400, activation='sigmoid')(encoded3_bn)

autoencoder3 = Model(input=encoded2_input, output=decoded3)
encoder3 = Model(input=encoded2_input, output=encoded3_bn)



#### Deep Autoencoder

In [8]:
encoded1_da = Dense(800, activation='sigmoid')(input_img)
encoded1_da_bn = BatchNormalization()(encoded1_da)
encoded2_da = Dense(400, activation='sigmoid')(encoded1_da_bn)
encoded2_da_bn = BatchNormalization()(encoded2_da)
encoded3_da = Dense(200, activation='sigmoid')(encoded2_da_bn)
encoded3_da_bn = BatchNormalization()(encoded3_da)
decoded3_da = Dense(400, activation='sigmoid')(encoded3_da_bn)
decoded2_da = Dense(800, activation='sigmoid')(decoded3_da)
decoded1_da = Dense(784, activation='sigmoid')(decoded2_da)

deep_autoencoder = Model(input=input_img, output=decoded1_da)



In [9]:
nad_encoded1_da = Dense(800, activation='sigmoid')(input_img)
nad_encoded1_da_bn = BatchNormalization()(nad_encoded1_da)
nad_encoded2_da = Dense(400, activation='sigmoid')(nad_encoded1_da_bn)
nad_encoded2_da_bn = BatchNormalization()(nad_encoded2_da)
nad_decoded2_da = Dense(800, activation='sigmoid')(nad_encoded2_da_bn)
nad_decoded1_da = Dense(784, activation='sigmoid')(nad_decoded2_da)

nad_deep_autoencoder = Model(input=input_img, output=nad_decoded1_da)



In [10]:
sgd1 = SGD(lr = 5, decay = 0.5, momentum = 0.85, nesterov = True)
sgd2 = SGD(lr = 5, decay = 0.5, momentum = 0.85, nesterov = True)
sgd3 = SGD(lr = 5, decay = 0.5, momentum = 0.85, nesterov = True)

autoencoder1.compile(loss='binary_crossentropy', optimizer = sgd1)
autoencoder2.compile(loss='binary_crossentropy', optimizer = sgd2)
autoencoder3.compile(loss='binary_crossentropy', optimizer = sgd3)

deep_autoencoder.compile(loss='binary_crossentropy', optimizer = sgd1)
nad_deep_autoencoder.compile(loss='binary_crossentropy', optimizer=sgd1)

# what will happen to the learning rates under this decay schedule?
lr = 5
for i in range(12):
    lr = lr - lr*.15
    print(lr)

4.25
3.6125
3.0706249999999997
2.6100312499999996
2.2185265624999997
1.8857475781249997
1.60288544140625
1.3624526251953124
1.1580847314160156
0.9843720217036133
0.8367162184480713
0.7112087856808607


### Training first autoencoder

In [11]:
autoencoder1.fit(training_inputs, training_inputs, nb_epoch=8, batch_size=512, validation_split=0.3, shuffle=True)

  if __name__ == '__main__':


Train on 21000 samples, validate on 9000 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x2918bc96588>

In [12]:
first_layer_code = encoder1.predict(training_inputs)
print(first_layer_code.shape)

(30000, 800)


### Training second autoencoder
encoder1에서 나온 first_layer_code를 second autoencoder의 input 및 output으로 넣어서 학습시킴

In [13]:
autoencoder2.fit(first_layer_code, first_layer_code, nb_epoch=8, batch_size=512, validation_split=0.25, shuffle=True)

  if __name__ == '__main__':


Train on 22500 samples, validate on 7500 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x291a1d8f320>

In [14]:
second_layer_code = encoder2.predict(first_layer_code)
print(second_layer_code.shape)

(30000, 400)


### Training third autoencoder
encoder2에서 나온 second_layer_code를 third autoencoder의 input 및 output으로 넣어서 학습시킴

In [15]:
autoencoder3.fit(second_layer_code, second_layer_code, nb_epoch=8, batch_size=512, validation_split=0.3, shuffle=True)

  if __name__ == '__main__':


Train on 21000 samples, validate on 9000 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x291a19929e8>

### Setting the weights of the deep autoencoder

In [16]:
deep_autoencoder.layers[1].set_weights(autoencoder1.layers[2].get_weights()) # first dense layer (800)
deep_autoencoder.layers[2].set_weights(autoencoder1.layers[3].get_weights()) # first bn layer
deep_autoencoder.layers[3].set_weights(autoencoder2.layers[2].get_weights()) # second dense layer
deep_autoencoder.layers[4].set_weights(autoencoder2.layers[3].get_weights()) # second bn layer
deep_autoencoder.layers[5].set_weights(autoencoder3.layers[2].get_weights()) # third dense layer
deep_autoencoder.layers[6].set_weights(autoencoder3.layers[3].get_weights()) # third bn layer
deep_autoencoder.layers[7].set_weights(autoencoder3.layers[4].get_weights()) # first decoder
deep_autoencoder.layers[8].set_weights(autoencoder2.layers[4].get_weights()) # second decoder
deep_autoencoder.layers[9].set_weights(autoencoder1.layers[4].get_weights()) # third decoder

### Setting up the weights of the not-as-deep autoencoder

In [17]:
nad_deep_autoencoder.layers[1].set_weights(autoencoder1.layers[2].get_weights()) # first dense layer
nad_deep_autoencoder.layers[2].set_weights(autoencoder1.layers[3].get_weights()) # first bn layer
nad_deep_autoencoder.layers[3].set_weights(autoencoder2.layers[2].get_weights()) # second dense layer
nad_deep_autoencoder.layers[4].set_weights(autoencoder2.layers[3].get_weights()) # second bn layer
nad_deep_autoencoder.layers[5].set_weights(autoencoder2.layers[4].get_weights()) # second decoder
nad_deep_autoencoder.layers[6].set_weights(autoencoder1.layers[4].get_weights()) # third decoder

In [18]:
decoded_inputs = nad_deep_autoencoder.predict(training_inputs[0:25,])

In [19]:
decoded_inputs

array([[ 0.17241056,  0.12108864,  0.1097716 , ...,  0.1520464 ,
         0.19927366,  0.12669806],
       [ 0.27290544,  0.13680121,  0.1488803 , ...,  0.15962237,
         0.20814535,  0.1475452 ],
       [ 0.261457  ,  0.12972115,  0.13423651, ...,  0.20445113,
         0.17800608,  0.16758651],
       ..., 
       [ 0.21481082,  0.17583218,  0.13250706, ...,  0.28173161,
         0.24388289,  0.23236974],
       [ 0.28083202,  0.21616349,  0.21145223, ...,  0.31591725,
         0.32515559,  0.16630042],
       [ 0.3799876 ,  0.17458279,  0.14970441, ...,  0.23168245,
         0.21439865,  0.18295565]], dtype=float32)

## On to 'fine-tuning' for classification 
전체 모델을 supervised learning으로 fine-tuning

In [20]:
dense1 = Dense(500, activation='relu')(nad_decoded1_da)
dense1_drop = Dropout(.3)(dense1)
dense2 = Dense(10, activation='sigmoid')(dense1_drop)

classifier = Model(input=input_img, output=dense2)
sgd4 = SGD(lr=.1, decay=0.001, momentum=0.95, nesterov=True)
classifier.compile(loss='categorical_crossentropy', optimizer=sgd4, metrics=['accuracy'])

classifier.fit(training_inputs, training_targets, nb_epoch=6, batch_size=600, validation_split=0.25, shuffle=True)



Train on 22500 samples, validate on 7500 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x291a3b886d8>

In [21]:
classifier.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 784)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 800)               628000    
_________________________________________________________________
batch_normalization_7 (Batch (None, 800)               3200      
_________________________________________________________________
dense_14 (Dense)             (None, 400)               320400    
_________________________________________________________________
batch_normalization_8 (Batch (None, 400)               1600      
_________________________________________________________________
dense_15 (Dense)             (None, 800)               320800    
_________________________________________________________________
dense_16 (Dense)             (None, 784)               627984    
__________

In [49]:
val_preds = classifier.predict(val_inputs)

In [51]:
predictions = np.argmax(val_preds, axis=1)
true_digits = np.argmax(val_targets, axis=1)
predictions[0:25]

array([7, 3, 9, 6, 4, 4, 1, 7, 0, 9, 3, 5, 8, 2, 7, 9, 7, 7, 8, 5, 7, 9, 6,
       6, 6], dtype=int64)

In [52]:
true_digits[0:25]

array([7, 3, 9, 6, 4, 4, 1, 7, 0, 9, 3, 5, 8, 2, 7, 4, 7, 7, 8, 5, 7, 9, 6,
       6, 6], dtype=int64)