In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn as sk
%matplotlib inline
from sklearn.model_selection import train_test_split
from datetime import datetime
from tqdm import tqdm
from sklearn.utils import shuffle
import nltk


In [2]:
# %load DNN.py
class DNN():
    
    number = 0
    
    def __init__(self,X_train,y_train,layers,as_placeholders=True,dtypes=[tf.float32,tf.float32],**kwargs):
        self.X_train = X_train
        self.y_train = y_train
        if as_placeholders:
            self.X = tf.placeholder(dtype=dtypes[0],shape=(None,)+X_train.shape[1:])
            self.y = tf.placeholder(dtype=dtypes[1],shape=(None,)+y_train.shape[1:])
        else:
            self.X = X_train
            self.y = y_train
        self.layers = layers
        self.activations = [self.X]
        DNN.number +=1
            
    def forward(self):       
        for i,layer in enumerate(self.layers):
            layer.set_input(self.activations[i])
            self.activations.append(layer.forward())
        return self.activations
    
    def cost(self):
        #return tf.reduce_sum(tf.square(self.activations[-1]-self.y))
        return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.activations[-1],labels=self.y))
    
    def squared_cost(self):
        return tf.reduce_mean(tf.squared_difference(self.activations[-1], self.y))
    
    def kl_divergence(self,p, q): 
        return tf.reduce_sum(p * tf.log(p/q))
        #return -0.5 * tf.reduce_sum(1 + p - tf.square(q) - tf.exp(p))
#         temp = p/q
#         return tf.reduce_mean(-tf.nn.softmax_cross_entropy_with_logits(labels=p, logits=temp))
       # return tf.contrib.distributions.kl_divergence(p, q,)
class DenseLayer():
    
    number = 0
    
    def __init__(self,units,activation_function=tf.nn.relu,batch_norm=False,keep_prob=1.0,initialize_weights=None):
        self.units = units
        self.keep_prob = keep_prob
        self.activation_function = activation_function
        self.batch_norm = batch_norm
        self.variable_scope_name = 'Dense-'+str(DenseLayer.number)
        self.initialize_weights = initialize_weights
        DenseLayer.number+=1
        
    def set_input(self,x):
        self.x = x
        if(len(x.shape)==4):
            shape = self.x.get_shape().as_list()        
            dim = np.prod(shape[1:])
            self.x = tf.reshape(tensor=self.x,shape=[-1,dim])

        with tf.variable_scope(self.variable_scope_name):  
            self.init_W((self.x.get_shape().as_list()[1],self.units))
            self.init_b(self.units)
            if(self.batch_norm):
                self.epsilon = 1e-3
                self.scale = tf.get_variable('scale', initializer=tf.ones(shape=[self.units]))
                self.beta =  tf.get_variable('beta', initializer=tf.zeros(shape=[self.units]))

        
    def init_W(self,shape):
        #another way to do this with get variable
        #self.w= tf.Variable(tf.multiply(tf.random_normal(shape),0.01),dtype=tf.float32)'
        if self.initialize_weights is None: 
            self.w=tf.get_variable('weight',shape=shape,initializer=tf.contrib.layers.xavier_initializer())
        else:
            self.w=tf.get_variable('weight',shape=shape,initializer=tf.constant_initializer(self.initialize_weights))
        tf.summary.histogram('weight',self.w)
    
    def init_b(self,shape):
        #self.b = tf.Variable(tf.fill([shape],0.1),dtype=tf.float32)
        init = tf.constant(0.1,shape=[shape])
        self.b = tf.get_variable('bias', initializer=init)
        tf.summary.histogram('bias',self.b)
        
    def forward(self):
        if self.x == None:
            print('no input defined')
        else:
            #batch norm not working properly
            #or maybe it is working properly but it needs bigger batch size. like 64. This makes sense because you make estimates of
            #mean and variance for every z calculation. This means that the bigger the batch size the more accurate the estimate
            if(self.batch_norm):
                self.z = tf.matmul(self.x,self.w)
                self.batch_mean, self.batch_var = tf.nn.moments(self.z,[0])
                self.z  = tf.nn.batch_normalization(self.z,self.batch_mean,self.batch_var,self.beta,self.scale,self.epsilon)
            else:
                self.z = tf.nn.xw_plus_b(self.x,self.w,self.b)
            if self.activation_function == None:
                self.activation = self.z
            else:
                self.activation  = self.activation_function(self.z)
            self.activation = tf.nn.dropout(self.activation,self.keep_prob)
            tf.summary.histogram('activations',self.activation)
            return self.activation
        
class ConvLayer():

    number = 0
    def __init__(self,kernel_size,number_of_kernels,padding='SAME',activation_function=tf.nn.relu,batch_norm = False,keep_prob=1.0):
        self.kernel_size = kernel_size
        self.number_of_kernels = number_of_kernels
        self.padding = padding
        self.activation_function = activation_function
        self.keep_prob = keep_prob
        self.batch_norm = batch_norm
        self.variable_scope_name = 'Conv-'+str(ConvLayer.number)
        ConvLayer.number+=1
        
    def set_input(self,x):
        with tf.variable_scope(self.variable_scope_name):         
            self.x = x
            if(isinstance(self.kernel_size,tuple)):
                self.init_Kernel(self.kernel_size+(x.get_shape().as_list()[-1],self.number_of_kernels))
            else:
                self.init_Kernel((self.kernel_size,self.kernel_size,x.get_shape().as_list()[-1],self.number_of_kernels)) 
            self.init_b(self.number_of_kernels)
            if(self.batch_norm):
                self.epsilon = 1e-3
                self.scale = tf.get_variable('scale', initializer=tf.ones(shape=[self.number_of_kernels]))
                self.beta =  tf.get_variable('beta', initializer=tf.zeros(shape=[self.number_of_kernels]))
        
    def init_Kernel(self,shape):
        self.kernel=tf.get_variable('kernel',shape=shape,initializer=tf.contrib.layers.xavier_initializer())
        tf.summary.histogram('kernel',self.kernel)
        
    def init_b(self,shape):
        self.b = tf.get_variable('bias',shape=[shape],initializer=tf.constant_initializer(0.1))
        tf.summary.histogram('bias',self.b)
        
    def forward(self):
        if self.x == None:
            print('no input defined')
        else:
            if self.batch_norm:
                
                self.z = tf.nn.conv2d(self.x , self.kernel, [1, 1, 1, 1], padding=self.padding)
                self.batch_mean, self.batch_var = tf.nn.moments(self.z,[0,1,2])
                self.z  = tf.nn.batch_normalization(self.z,self.batch_mean,self.batch_var,self.beta,self.scale,self.epsilon)
                
            else:
                self.z = tf.nn.conv2d(self.x , self.kernel, [1, 1, 1, 1], padding=self.padding)
                self.z = tf.nn.bias_add(self.z, self.b)  
                    
            if self.activation_function == None:
                self.activation = self.z
            else:
                self.activation  = self.activation_function(self.z)
            #dropout
            self.activation = tf.nn.dropout(self.activation,self.keep_prob)
            tf.summary.histogram('activations',self.activation)
        return self.activation
        
class PoolingLayer():
    
    number = 0
    
    def __init__(self,kernel_size,stride,padding='SAME',pooling='MAX'):
        self.kernel_size = kernel_size
        self.stride = stride
        self.pooling = pooling
        self.padding = padding
        self.variable_scope_name = 'Pool-'+str(PoolingLayer.number)
        PoolingLayer.number+=1
        
    def set_input(self,x):
        with tf.variable_scope(self.variable_scope_name):         
            self.x = x
        
    def forward(self):
        if self.x == None:
            print('no input defined')
        else:
            if(isinstance(self.kernel_size,tuple)):
                size = list(self.kernel_size)
            else:
                size = [self.kernel_size,self.kernel_size]   
            if (self.pooling == 'MAX'):
                self.activation = tf.nn.max_pool(self.x,[1]+size+[1],[1,self.stride,self.stride,1],padding=self.padding)
            elif (self.pooling == 'AVG'):
                self.activation = tf.nn.avg_pool(self.x,[1]+size+[1],[1,self.stride,self.stride,1],padding=self.padding)
        return self.activation
    
class EmbeddingLayer():
    
    number = 0
    
    def __init__(self,vocabulary_size,embedding_diamension,pretrained_word2vec=True,as_sequences=None):
        self.embedding_diamension = embedding_diamension
        self.vocabulary_size = vocabulary_size
        self.pretrained_word2vec = pretrained_word2vec
        self.variable_scope_name = 'Embedding-'+str(PoolingLayer.number)
        self.init_Embeddings((self.vocabulary_size,self.embedding_diamension))
        self.as_sequences = as_sequences
        EmbeddingLayer.number+=1
        
    def set_input(self,x):
        with tf.variable_scope(self.variable_scope_name):         
            self.x = x
            if self.as_sequences:
                self.sequence_length = x.shape[1]
        
    def init_Embeddings(self,shape):
        with tf.variable_scope(self.variable_scope_name):  
            if(self.pretrained_word2vec):             
                self.W = tf.Variable(tf.constant(0.0, shape=[self.vocabulary_size, self.embedding_diamension]),trainable=True, name="embedding_weights")
                self.embedding_placeholder = tf.placeholder(tf.float32, [self.vocabulary_size, self.embedding_diamension])
                self.embedding_init = self.W.assign(self.embedding_placeholder)
            else:
                self.W = tf.get_variable("embedding_weights", shape=[self.vocabulary_size, self.embedding_diamension],initializer=tf.contrib.layers.xavier_initializer())
    def forward(self):
        # TODO: implement as sequences to provide data for lstm
        if self.x == None:
            print('no input defined')
        else:
            #this is called activation to follow the pattern of other layers
            self.activation = tf.nn.embedding_lookup(self.W, self.x)
            if self.as_sequences == 'static':
                self.activation = tf.transpose(self.activation, [1,0,2])
                self.activation = tf.reshape(self.activation , [-1,self.embedding_diamension])
                self.activation  = tf.split(self.activation ,self.sequence_length,0)
            elif self.as_sequences == 'dynamic':
                pass
            else:
                self.activation = tf.expand_dims(self.activation, -1)        #need 4 diamensions to apply convolution
            return self.activation
class RnnLayer():
    number = 0
    def __init__(self,units,activation_function = None,cell_type = 'LSTM',keep_prob=1.0,rnn_type='static'):
        self.units = units
        self.activation_function = activation_function
        self.keep_prob = keep_prob
        self.cell_type = cell_type
        self.rnn_type = rnn_type
        self.variable_scope_name = 'Rnn-'+str(RnnLayer.number)
        RnnLayer.number+=1
        
    def set_input(self,x):
        with tf.variable_scope(self.variable_scope_name):         
            self.x = x
        
    def forward(self):
        if self.x == None:
            print('no input defined')
        else:
            if self.cell_type == 'LSTM':
                cell = tf.nn.rnn_cell.LSTMCell(self.units,activation=self.activation_function)
                cell = tf.nn.rnn_cell.DropoutWrapper(cell=cell, output_keep_prob=self.keep_prob)
            elif self.cell_type == 'GRU':
                cell = tf.nn.rnn_cell.GRUCell(self.units,activation=self.activation_function)
                cell = tf.nn.rnn_cell.DropoutWrapper(cell=cell, output_keep_prob=self.keep_prob)
                
            #need to fix this
            #self.activation = tf.nn.dynamic_rnn(cell,self.x,sequence_length=self.sequence_length)
            if (self.rnn_type == 'static'):
                self.output, self.states = tf.nn.static_rnn(cell,self.x,dtype=tf.float32)
                self.activation = self.output[-1]
            elif (self.rnn_type == 'dynamic'):
                self.output, self.states = tf.nn.dynamic_rnn(cell,self.x,dtype=tf.float32)
                self.activation = self.output[:,-1,:]
        return self.activation
    

In [3]:
tf.__version__

'1.4.0'

In [11]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('D:/programming/python/datasets/mnist_data', one_hot=True)

X_train = mnist.train.images
y_train = mnist.train.labels
X_test = mnist.test.images
y_test = mnist.test.labels
# X_train = np.concatenate((X_train,X_test))
# y_train = np.concatenate((y_train,y_test))
y_train = np.argmax(y_train,axis=1).reshape((-1,1))

def get_batches(data_list, batch_size,shuffle=True):
    """
    Generates a batch generator for a dataset.
    """
    assert isinstance(data_list,list)
    
    size = data_list[0].shape[0]
    if shuffle:
        shuffle_indices = np.random.permutation(np.arange(size))
        data_list = [i[shuffle_indices] for i in data_list]
        
    number_of_batches = int(np.ceil(size/batch_size))
    for i in range(number_of_batches-1):
        index = i*batch_size
        yield [i[index:index+batch_size] for i in data_list]
    yield[i[(number_of_batches-1)*batch_size:] for i in data_list]

Extracting D:/programming/python/datasets/mnist_data\train-images-idx3-ubyte.gz
Extracting D:/programming/python/datasets/mnist_data\train-labels-idx1-ubyte.gz
Extracting D:/programming/python/datasets/mnist_data\t10k-images-idx3-ubyte.gz
Extracting D:/programming/python/datasets/mnist_data\t10k-labels-idx1-ubyte.gz


In [102]:
class SAE():
    
    def __init__(self,X_train,layer_dimensions):
        self.X_train = X_train
        self.input_dimensions = X_train.shape[1]
        self.layer_dimensions = layer_dimensions
        self.init_layers()
        with tf.variable_scope('encoder'):
            self.encoder = DNN(X_train,X_train,self.encoder_layers)
            self.encoder_activations = self.encoder.forward()
            self.encoder_output = self.encoder_activations[-1]
        with tf.variable_scope('decoder'):
            self.decoder = DNN(self.encoder_output,self.encoder.y,self.decoder_layers,as_placeholders=False)# transfoermed x and previous y which were placeholders
            self.decoder_activations = self.decoder.forward()
    
    def init_layers(self):
        
        self.encoder_layers = []
        self.decoder_layers = []
        for i in self.layer_dimensions[:-1]:
            self.encoder_layers.append(DenseLayer(i))
        self.encoder_layers.append(DenseLayer(self.layer_dimensions[-1],activation_function=None)) ##bottleneck layer
        for i in reversed(self.layer_dimensions[:-1]):
            self.decoder_layers.append(DenseLayer(i))
        self.decoder_layers.append(DenseLayer(self.input_dimensions,activation_function=None)) #output layer no activation
        
    def fit(self,num_epochs,batch_size,starter_learning_rate=0.1,number_of_decays=3,save_path ='./model/my-test-model',log_every=20):

        loss = self.decoder.squared_cost()
        tf.summary.scalar('loss', loss)
        
        learning_rate = tf.placeholder(tf.float32)
        # Passing global_step to minimize() will increment it at each step.
        train = (
            tf.train.AdamOptimizer(learning_rate=learning_rate)
            .minimize(loss)
        )
        correct_pred = tf.equal(tf.argmax(self.decoder_activations[-1], 1), tf.argmax(self.encoder.X, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        tf.summary.scalar('accuracy', accuracy)
        merged = tf.summary.merge_all()
        
        saver = tf.train.Saver()

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            #write training data to file in order to visualize it with tensorboard
            log_name = datetime.now().strftime("%Y%m%d-%H%M%S")
            #train_writer = tf.summary.FileWriter('summaries/train/logs'+log_name,sess.graph)
            test_writer = tf.summary.FileWriter('summaries/test/logs'+log_name,sess.graph)
            #training 
            lr = starter_learning_rate
            for j in range(number_of_decays):
                lr = starter_learning_rate* np.power(10.0, -j)
                print('learning rate: ' + str(lr)+' batch size: '+str(batch_size))
                for i in range(int(num_epochs/number_of_decays)):
                    print(str(i+1)+' epoch out of: '+str(int(num_epochs/3)))
                    for c,batch in enumerate(get_batches([self.X_train],batch_size=batch_size)):
                        batch_x = batch[0]
                        feed_dict={self.encoder.X : batch_x,self.encoder.y : batch_x,learning_rate:lr}
                        cost,_,acc= sess.run([loss,train,accuracy],feed_dict=feed_dict)
                    
                    if i % log_every == 0:
                        feed_dict={self.encoder.X : self.X_train,self.encoder.y : self.X_train}
                        summary,acc,cost = sess.run([merged,accuracy,loss],feed_dict=feed_dict)
                        test_writer.add_summary(summary, i)  
                        print('cost: '+ str(cost)+ ' accuracy: ',str(acc))
                feed_dict={self.encoder.X : self.X_train,self.encoder.y : self.X_train}
                self.extract_feature = sess.run(self.encoder_output,feed_dict=feed_dict)
                saver.save(sess, save_path)
        
        

In [74]:
class ClusteringLayer():
    
    number = 0 
    
    def __init__(self, n_clusters, alpha=1.0,initialize_weights = None):
        self.n_clusters = n_clusters
        self.alpha = alpha
        self.initialize_weights = initialize_weights
        self.variable_scope_name = 'Clustering-'+str(ClusteringLayer.number)
        ClusteringLayer.number+=1
        
    def set_input(self,x):
        
        self.x = x

        with tf.variable_scope(self.variable_scope_name):  
            self.init_Clusters((self.n_clusters,self.x.get_shape().as_list()[1]))
        
    def init_Clusters(self,shape):
        if self.initialize_weights is None: 
            self.clusters=tf.get_variable('weight',shape=shape,initializer=tf.contrib.layers.xavier_initializer())
        else:
            self.clusters=tf.get_variable('weight',shape=shape,initializer=tf.constant_initializer(self.initialize_weights))
        tf.summary.histogram('weight',self.clusters)
        
    def forward(self):
        if self.x == None:
            print('no input defined')
        else:
            with tf.name_scope('students_t'):
                q = 1.0 / (1.0 + (tf.reduce_sum(tf.square(tf.expand_dims(self.x, axis=1) - self.clusters), axis=2) / self.alpha))
                q **= (self.alpha + 1.0) / 2.0
                q = tf.transpose(tf.transpose(q) / tf.reduce_sum(q, axis=1))
                return q
        
    def target_distribution(self,q):
        weight = tf.square(q) / tf.reduce_sum(q,axis=0)
        return tf.transpose( tf.transpose(weight) /tf.reduce_sum(weight,axis=1))
            
        

    

In [None]:
def reset():
    tf.reset_default_graph()
    DenseLayer.number = 0
    ConvLayer.number = 0
    PoolingLayer.number = 0
    EmbeddingLayer.number = 0
    RnnLayer.number = 0
    SAE.number = 0
#layer_wise training    
layers = [500,500,2000,10]
layer_input = X_train
epochs = 200
for i,layer in enumerate(layers):
    reset()
    sae = SAE(layer_input,[layer])
    sae.fit(epochs,256,save_path='./model/autoencoder__'+str(i))
    layer_input = sae.extract_feature

learning rate: 0.1 batch size: 256
1 epoch out of: 66
cost: 0.780974 accuracy:  0.00447273
2 epoch out of: 66
3 epoch out of: 66
4 epoch out of: 66
5 epoch out of: 66
6 epoch out of: 66
7 epoch out of: 66
8 epoch out of: 66
9 epoch out of: 66
10 epoch out of: 66
11 epoch out of: 66
12 epoch out of: 66
13 epoch out of: 66
14 epoch out of: 66
15 epoch out of: 66
16 epoch out of: 66
17 epoch out of: 66
18 epoch out of: 66
19 epoch out of: 66
20 epoch out of: 66
21 epoch out of: 66
cost: 0.0241161 accuracy:  0.0150364
22 epoch out of: 66
23 epoch out of: 66
24 epoch out of: 66
25 epoch out of: 66
26 epoch out of: 66
27 epoch out of: 66
28 epoch out of: 66
29 epoch out of: 66


In [95]:
#restore the weights i need from the encoder   
def restore_variables(file,variable_name,scope=None):
    tf.reset_default_graph()
    with tf.Session() as sess:
        saver = tf.train.import_meta_graph(file)##copies the graph
        sess.run(tf.global_variables_initializer())
        graph = tf.get_default_graph()
        variables=[]
        for i in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope):
            if variable_name in  i.name:
                variables.append(graph.get_tensor_by_name(i.name).eval())
                print(i)
        return variables
variables  = restore_variables('./model/autoencoder__0.meta','weight:0','encoder')

<tf.Variable 'encoder/Dense-0/weight:0' shape=(784, 500) dtype=float32_ref>


In [82]:
tf.reset_default_graph()
DenseLayer.number = 0
ConvLayer.number = 0
PoolingLayer.number = 0
EmbeddingLayer.number = 0
RnnLayer.number = 0
SAE.number = 0


sae = SAE(X_train,[500, 500, 2000, 10])
sae.fit(1,256,number_of_decays=1)
sae.extract_feature.shape

learning rate: 0.1 batch size: 256
1 epoch out of: 0
cost: 0.134247 accuracy:  0.00178182


(55000, 10)

In [96]:
weights  = restore_variables('./model/my-test-model.meta','weight:0','encoder')

<tf.Variable 'encoder/Dense-0/weight:0' shape=(784, 500) dtype=float32_ref>
<tf.Variable 'encoder/Dense-1/weight:0' shape=(500, 500) dtype=float32_ref>
<tf.Variable 'encoder/Dense-2/weight:0' shape=(500, 2000) dtype=float32_ref>
<tf.Variable 'encoder/Dense-3/weight:0' shape=(2000, 10) dtype=float32_ref>


In [97]:
from sklearn.cluster import KMeans
tf.reset_default_graph()
DenseLayer.number = 0
ConvLayer.number = 0
PoolingLayer.number = 0
EmbeddingLayer.number = 0
RnnLayer.number = 0
SAE.number = 0
ClusteringLayer.number = 0

def cluster_acc(y_true, y_pred):
    """
    Calculate clustering accuracy. Require scikit-learn installed
    # Arguments
        y: true labels, numpy.array with shape `(n_samples,)`
        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
    # Return
        accuracy, in [0,1]
    """
    y_true = y_true.astype(np.int64)
    assert y_pred.size == y_true.size
    D = max(y_pred.max(), y_true.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(y_pred.size):
        w[y_pred[i], y_true[i]] += 1
    from sklearn.utils.linear_assignment_ import linear_assignment
    ind = linear_assignment(w.max() - w)
    return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size

def target_distribution_np(q):
    weight = np.square(q) / np.sum(q,axis=0)
    return np.transpose( np.transpose(weight) /np.sum(weight,axis=1))

# def target_distribution(q):
#     with tf.name_scope('target_distribution'):
#         weight = tf.square(q) / tf.reduce_sum(q,axis=0)
#         return tf.transpose( tf.transpose(weight) /tf.reduce_sum(weight,axis=1))

p = tf.placeholder(dtype=tf.float32,shape=(None,10))
#create encoder with pretrained weights
encoder = DNN(X_train,y_train,[DenseLayer(500,initialize_weights=weights[0]),DenseLayer(500,initialize_weights=weights[1]),\
                               DenseLayer(2000,initialize_weights=weights[2]),DenseLayer(10,initialize_weights=weights[3],activation_function=None),\
                                ],dtypes=[tf.float32,tf.int32])

# encoder = DNN(X_train,y_train,[DenseLayer(500),DenseLayer(500),\
#                                DenseLayer(2000),DenseLayer(10,activation_function=None),\
#                                 ],dtypes=[tf.float32,tf.int32])

encoder_output = encoder.forward()[-1]
#get encoder features
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    features = sess.run(encoder_output,feed_dict={encoder.X:X_train,encoder.y:y_train})

#get kmean weights to initialize clustering layer
print('initializing kmeans clusters ')
n_clusters=10
km = KMeans(n_clusters, n_init=20)
y_pred = km.fit_predict(features)
kmeans_weights =km.cluster_centers_ 
print(km.cluster_centers_.shape)

clustering = DNN(encoder_output,encoder.y,[ClusteringLayer(10,initialize_weights=kmeans_weights)],as_placeholders=False)
clustering_activations = clustering.forward()
q = clustering_activations[-1]

kld_cost = encoder.kl_divergence(p,q)
tf.summary.scalar('loss',kld_cost)
#optimizer = tf.train.MomentumOptimizer(0.001,0.9)
optimizer = tf.train.AdamOptimizer()
train = optimizer.minimize(kld_cost)

merged = tf.summary.merge_all()

num_epochs = 100
batch_size = 256
tol=1e-3
update_rate = 10 # epochs

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    log_name = datetime.now().strftime("%Y%m%d-%H%M%S")
    #train_writer = tf.summary.FileWriter('summaries/train/logs'+log_name,sess.graph)
    test_writer = tf.summary.FileWriter('summaries/test/logs'+log_name,sess.graph)
    
    #training 
    for i in tqdm(range(num_epochs)):
         
        if (i % update_rate == 0): # epochs to update the p_values
            q_value=sess.run(q,feed_dict={encoder.X : X_train})  
            p_value = target_distribution_np(q_value)
            summary,cost=sess.run([merged,kld_cost],feed_dict={encoder.X : X_train,p:p_value})  
            test_writer.add_summary(summary, i)
            batch_train_acc = cluster_acc(y_train,np.argmax(q_value,axis=1).reshape(-1,1))
            print('____________________________________________________')
            print("cost: "+str(cost)+" accuracy: "+ str(batch_train_acc)+' before epoch '+ str(i))
        
        for c,batch in enumerate(get_batches([encoder.X_train,encoder.y_train,p_value],batch_size=batch_size)): 
            batch_x = batch[0]
            batch_y = batch[1]
            batch_p = batch[2]
            
            q_value,cost,_,=sess.run([p,kld_cost,train],feed_dict={encoder.X:batch_x , p:batch_p}) 
            batch_train_acc = cluster_acc(batch_y,np.argmax(q_value,axis=1).reshape(-1,1))
            if c%40==0:
                print("cost: "+str(cost)+"\taccuracy: "+ str(batch_train_acc)+" in " + str(i) + ' epoch in ' +str(c) +'batch')
        


initializing kmeans clusters 
(10, 10)


  0%|                                                                                          | 0/100 [00:00<?, ?it/s]

____________________________________________________
cost: 57.4275 accuracy: 0.333836363636 before epoch 0
cost: 0.258204	accuracy: 0.34375 in 0 epoch in 0batch
cost: 0.996284	accuracy: 0.36328125 in 0 epoch in 40batch
cost: 0.290521	accuracy: 0.34765625 in 0 epoch in 80batch
cost: 0.161054	accuracy: 0.36328125 in 0 epoch in 120batch
cost: 0.126383	accuracy: 0.35546875 in 0 epoch in 160batch
cost: 0.107426	accuracy: 0.33984375 in 0 epoch in 200batch


  1%|▊                                                                                 | 1/100 [00:13<21:28, 13.02s/it]

KeyboardInterrupt: 

In [None]:
def cluster_acc(y_true, y_pred):
    """
    Calculate clustering accuracy. Require scikit-learn installed
    # Arguments
        y: true labels, numpy.array with shape `(n_samples,)`
        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
    # Return
        accuracy, in [0,1]
    """
    y_true = y_true.astype(np.int64)
    assert y_pred.size == y_true.size
    D = max(y_pred.max(), y_true.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(y_pred.size):
        w[y_pred[i], y_true[i]] += 1
    from sklearn.utils.linear_assignment_ import linear_assignment
    ind = linear_assignment(w.max() - w)
    return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size

print('performing k-means clustering on the extracted features')

n_clusters=10
print(sae.extract_feature.shape)
km = KMeans(n_clusters, n_init=20)
y_pred = km.fit_predict(sae.extract_feature)
y = y_train.reshape(-1)
from sklearn.metrics import normalized_mutual_info_score as nmi
from sklearn.metrics import accuracy_score 
print('K-means clustering result on extracted features: NMI = '+str(nmi(y, y_pred)))
print('K-means clustering result on extracted features: accuracy = '+str(cluster_acc(y, y_pred)))


In [None]:
import time
from sklearn.manifold import TSNE
from ggplot import *

def tsne(x,y,n_sne):  
    time_start = time.time()
    tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
    tsne_results = tsne.fit_transform(x[:n_sne])
    print(x.shape)
    print(y.shape)
    print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))
    x_tsne = np.expand_dims(tsne_results[:,0],1)
    y_tsne = np.expand_dims(tsne_results[:,1],1)
    label = np.int32(y[:n_sne])
    data = np.concatenate((x_tsne,y_tsne,label),axis=1)

    df_tsne = pd.DataFrame(data=data,columns=['x-tsne','y-tsne','label'])
    df_tsne.label  = df_tsne.label.map(lambda x:  str(x))
    chart = ggplot( df_tsne, aes(x='x-tsne', y='y-tsne', color='label') ) \
            + geom_point(size=70,alpha=0.1) \
            + ggtitle("tSNE dimensions colored by digit")
    chart.show()
tsne(X_train,y_train,5000)
tsne(sae.extract_feature,y_train,5000)