This notebook covers deep embedded clustering (DEC) on face images. The paper is - https://arxiv.org/pdf/1511.06335.pdf

The source code is hugely influenced by https://github.com/XifengGuo/DEC-keras/blob/master/DEC.py

In [1]:
from time import time
import numpy as np
import keras.backend as K
from keras.engine.topology import Layer, InputSpec
from keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D, UpSampling2D, Lambda, RepeatVector, Reshape, Conv2DTranspose
from keras.models import Model
from keras.optimizers import SGD
from keras import callbacks
from keras.initializers import VarianceScaling
from sklearn.cluster import KMeans
from keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D, UpSampling2D, Lambda, RepeatVector, Reshape, Conv2DTranspose
from keras.models import Model
from keras import backend as K
from sklearn.datasets import fetch_olivetti_faces
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.metrics import binary_crossentropy, kullback_leibler_divergence
from keras import regularizers
from keras.callbacks import TensorBoard, EarlyStopping
from keras.optimizers import SGD, Adam

Using TensorFlow backend.


In [2]:
data = fetch_olivetti_faces()

In [3]:
len(data.images)

400

In [4]:
images = [np.reshape(i, (64, 64, 1)) for i in data.images]
images = np.array(images)
print (images.shape)

(400, 64, 64, 1)


In [5]:
input_img = Input(shape=(64, 64, 1))  # adapt this if using `channels_first` image data format

x = Conv2D(64, (3, 3), activation='relu', padding='same')(input_img)
#x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
#x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)

# at this point the representation is (4, 4, 8) i.e. 128-dimensional

x = Conv2D(64, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
#x = UpSampling2D((2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu',padding='same')(x)
#x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='relu', padding='same')(x)

autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer=Adam(), loss='binary_crossentropy')
encoder = Model(input_img, Flatten()(encoded))

Instructions for updating:
Colocations handled automatically by placer.


In [6]:
autoencoder.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 64, 64, 1)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 64, 64, 64)        640       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 64, 64, 64)        36928     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 64, 64, 64)        36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 32, 64)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 32, 32, 64)        36928     
_________________________________________________________________
up_sampling2d_1 (UpSampling2 (None, 64, 64, 64)        0         
__________

In [7]:
encoder.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 64, 64, 1)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 64, 64, 64)        640       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 64, 64, 64)        36928     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 64, 64, 64)        36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 32, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 65536)             0         
Total params: 74,496
Trainable params: 74,496
Non-trainable params: 0
_________________________________________________________________


In [8]:
class ClusteringLayer(Layer):
    def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
        super(ClusteringLayer, self).__init__(**kwargs)
        self.n_clusters = n_clusters
        self.alpha = alpha
        self.initial_weights = weights
        self.input_spec = InputSpec(ndim=2)
    
    def build(self, input_shape):
        assert len(input_shape) == 2
        input_dim = input_shape[1]
        self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
        self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
        self.built = True
        
    def call(self, inputs, **kwargs):
        """ student t-distribution, as same as used in t-SNE algorithm.
                 q_ij = 1/(1+dist(x_i, u_j)^2), then normalize it.
        Arguments:
            inputs: the variable containing data, shape=(n_samples, n_features)
        Return:
            q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters)
        """
        q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
        q **= (self.alpha + 1.0) / 2.0
        q = K.transpose(K.transpose(q) / K.sum(q, axis=1))
        return q

    def compute_output_shape(self, input_shape):
        assert input_shape and len(input_shape) == 2
        return input_shape[0], self.n_clusters

    def get_config(self):
        config = {'n_clusters': self.n_clusters}
        base_config = super(ClusteringLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))


In [9]:
n_clusters = 40
alpha = 1

clustering_layer = ClusteringLayer(n_clusters, alpha=alpha, name='clustering')(encoder.output)
DEC = Model(inputs=encoder.input, outputs=clustering_layer) 

In [10]:
DEC.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 64, 64, 1)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 64, 64, 64)        640       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 64, 64, 64)        36928     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 64, 64, 64)        36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 32, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 65536)             0         
_________________________________________________________________
clustering (ClusteringLayer) (None, 40)                2621440   
Total para

In [11]:
def loss(y_true, y_pred):
    weight = y_pred ** 2 / K.sum(y_pred, axis=0)
    y_true = K.transpose(K.transpose(weight) / K.sum(weight, axis=1))
    return kullback_leibler_divergence(y_true, y_pred)

In [12]:
DEC.compile(optimizer=Adam(),loss=loss)

In [13]:
X_train, X_test, y_train, y_test = train_test_split(images, data.target, test_size=.2, random_state = 123, shuffle=True)

In [14]:
autoencoder.fit(X_train, X_train, epochs=20, validation_split=.2,callbacks=[EarlyStopping(monitor="val_loss",patience=5,mode='min')])

Instructions for updating:
Use tf.cast instead.
Train on 256 samples, validate on 64 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1a36efaf98>

In [15]:
km = KMeans(n_clusters)
km.fit(encoder.predict(X_train))
DEC.get_layer('clustering').set_weights([km.cluster_centers_])

In [16]:
DEC.fit(X_train, y_train, epochs=20, validation_split=.2,callbacks=[EarlyStopping(monitor="val_loss",patience=5,mode='min')])

Train on 256 samples, validate on 64 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1a37c2a978>

In [17]:
testpred = DEC.predict(X_test)
testpred = testpred.argmax(axis=1)

In [18]:
testpred

array([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5])

In [19]:
y_test

array([23,  4, 30, 15, 34, 30,  5, 28, 37,  9, 17, 21, 21,  3, 18, 34, 19,
       16,  4,  0, 19, 28,  7, 17,  1,  4,  1, 23, 24, 32, 17, 10, 28, 19,
       34, 26, 33, 33,  7, 29, 23, 29, 26, 15, 25,  3, 35, 24, 39, 24,  2,
       10, 27, 23, 28,  2,  1, 30, 19,  8,  3,  2,  5, 22, 39, 27, 12, 21,
       10, 15,  7, 26, 23,  5, 24, 17, 20, 32, 11, 32])

In [33]:
q = np.random.random((10,40))
q = K.constant(q)
weight = q ** 2 / K.sum(q, axis=0)
p = K.transpose(K.transpose(weight) / K.sum(weight, axis=1))
print (K.eval(kullback_leibler_divergence(p, q)))