In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn as sk
%matplotlib inline
from sklearn.model_selection import train_test_split
from datetime import datetime
from tqdm import tqdm
from sklearn.utils import shuffle
import nltk


In [2]:
# %load DNN.py
class DNN():
    
    number = 0
    
    def __init__(self,X_train,y_train,layers,as_placeholders=True,dtypes=[tf.float32,tf.float32],**kwargs):
        self.X_train = X_train
        self.y_train = y_train
        if as_placeholders:
            self.X = tf.placeholder(dtype=dtypes[0],shape=(None,)+X_train.shape[1:])
            self.y = tf.placeholder(dtype=dtypes[1],shape=(None,)+y_train.shape[1:])
        else:
            self.X = X_train
            self.y = y_train
        self.layers = layers
        self.activations = [self.X]
        DNN.number +=1
            
    def forward(self):       
        for i,layer in enumerate(self.layers):
            layer.set_input(self.activations[i])
            self.activations.append(layer.forward())
        return self.activations
    
    def cost(self):
        #return tf.reduce_sum(tf.square(self.activations[-1]-self.y))
        return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.activations[-1],labels=self.y))
    
    def squared_cost(self):
        return tf.reduce_mean(tf.squared_difference(self.activations[-1], self.y))
    
    def kl_divergence(self,p, q): 
        return tf.reduce_sum(p * tf.log(p/q))
        #return -0.5 * tf.reduce_sum(1 + p - tf.square(q) - tf.exp(p))
#         temp = p/q
#         return tf.reduce_mean(-tf.nn.softmax_cross_entropy_with_logits(labels=p, logits=temp))
       # return tf.contrib.distributions.kl_divergence(p, q,)
class DenseLayer():
    
    number = 0
    
    def __init__(self,units,activation_function=tf.nn.relu,batch_norm=False,keep_prob=1.0,initialize_weights=None):
        self.units = units
        self.keep_prob = keep_prob
        self.activation_function = activation_function
        self.batch_norm = batch_norm
        self.variable_scope_name = 'Dense-'+str(DenseLayer.number)
        self.initialize_weights = initialize_weights
        DenseLayer.number+=1
        
    def set_input(self,x):
        self.x = x
        if(len(x.shape)==4):
            shape = self.x.get_shape().as_list()        
            dim = np.prod(shape[1:])
            self.x = tf.reshape(tensor=self.x,shape=[-1,dim])

        with tf.variable_scope(self.variable_scope_name):  
            self.init_W((self.x.get_shape().as_list()[1],self.units))
            self.init_b(self.units)
            if(self.batch_norm):
                self.epsilon = 1e-3
                self.scale = tf.get_variable('scale', initializer=tf.ones(shape=[self.units]))
                self.beta =  tf.get_variable('beta', initializer=tf.zeros(shape=[self.units]))

        
    def init_W(self,shape):
        #another way to do this with get variable
        #self.w= tf.Variable(tf.multiply(tf.random_normal(shape),0.01),dtype=tf.float32)'
        if self.initialize_weights is None: 
            self.w=tf.get_variable('weight',shape=shape,initializer=tf.contrib.layers.xavier_initializer())
        else:
            self.w=tf.get_variable('weight',shape=shape,initializer=tf.constant_initializer(self.initialize_weights))
        tf.summary.histogram('weight',self.w)
    
    def init_b(self,shape):
        #self.b = tf.Variable(tf.fill([shape],0.1),dtype=tf.float32)
        init = tf.constant(0.1,shape=[shape])
        self.b = tf.get_variable('bias', initializer=init)
        tf.summary.histogram('bias',self.b)
        
    def forward(self):
        if self.x == None:
            print('no input defined')
        else:
            #batch norm not working properly
            #or maybe it is working properly but it needs bigger batch size. like 64. This makes sense because you make estimates of
            #mean and variance for every z calculation. This means that the bigger the batch size the more accurate the estimate
            if(self.batch_norm):
                self.z = tf.matmul(self.x,self.w)
                self.batch_mean, self.batch_var = tf.nn.moments(self.z,[0])
                self.z  = tf.nn.batch_normalization(self.z,self.batch_mean,self.batch_var,self.beta,self.scale,self.epsilon)
            else:
                self.z = tf.nn.xw_plus_b(self.x,self.w,self.b)
            if self.activation_function == None:
                self.activation = self.z
            else:
                self.activation  = self.activation_function(self.z)
            self.activation = tf.nn.dropout(self.activation,self.keep_prob)
            tf.summary.histogram('activations',self.activation)
            return self.activation
        
class ConvLayer():

    number = 0
    def __init__(self,kernel_size,number_of_kernels,padding='SAME',activation_function=tf.nn.relu,batch_norm = False,keep_prob=1.0):
        self.kernel_size = kernel_size
        self.number_of_kernels = number_of_kernels
        self.padding = padding
        self.activation_function = activation_function
        self.keep_prob = keep_prob
        self.batch_norm = batch_norm
        self.variable_scope_name = 'Conv-'+str(ConvLayer.number)
        ConvLayer.number+=1
        
    def set_input(self,x):
        with tf.variable_scope(self.variable_scope_name):         
            self.x = x
            if(isinstance(self.kernel_size,tuple)):
                self.init_Kernel(self.kernel_size+(x.get_shape().as_list()[-1],self.number_of_kernels))
            else:
                self.init_Kernel((self.kernel_size,self.kernel_size,x.get_shape().as_list()[-1],self.number_of_kernels)) 
            self.init_b(self.number_of_kernels)
            if(self.batch_norm):
                self.epsilon = 1e-3
                self.scale = tf.get_variable('scale', initializer=tf.ones(shape=[self.number_of_kernels]))
                self.beta =  tf.get_variable('beta', initializer=tf.zeros(shape=[self.number_of_kernels]))
        
    def init_Kernel(self,shape):
        self.kernel=tf.get_variable('kernel',shape=shape,initializer=tf.contrib.layers.xavier_initializer())
        tf.summary.histogram('kernel',self.kernel)
        
    def init_b(self,shape):
        self.b = tf.get_variable('bias',shape=[shape],initializer=tf.constant_initializer(0.1))
        tf.summary.histogram('bias',self.b)
        
    def forward(self):
        if self.x == None:
            print('no input defined')
        else:
            if self.batch_norm:
                
                self.z = tf.nn.conv2d(self.x , self.kernel, [1, 1, 1, 1], padding=self.padding)
                self.batch_mean, self.batch_var = tf.nn.moments(self.z,[0,1,2])
                self.z  = tf.nn.batch_normalization(self.z,self.batch_mean,self.batch_var,self.beta,self.scale,self.epsilon)
                
            else:
                self.z = tf.nn.conv2d(self.x , self.kernel, [1, 1, 1, 1], padding=self.padding)
                self.z = tf.nn.bias_add(self.z, self.b)  
                    
            if self.activation_function == None:
                self.activation = self.z
            else:
                self.activation  = self.activation_function(self.z)
            #dropout
            self.activation = tf.nn.dropout(self.activation,self.keep_prob)
            tf.summary.histogram('activations',self.activation)
        return self.activation
        
class PoolingLayer():
    
    number = 0
    
    def __init__(self,kernel_size,stride,padding='SAME',pooling='MAX'):
        self.kernel_size = kernel_size
        self.stride = stride
        self.pooling = pooling
        self.padding = padding
        self.variable_scope_name = 'Pool-'+str(PoolingLayer.number)
        PoolingLayer.number+=1
        
    def set_input(self,x):
        with tf.variable_scope(self.variable_scope_name):         
            self.x = x
        
    def forward(self):
        if self.x == None:
            print('no input defined')
        else:
            if(isinstance(self.kernel_size,tuple)):
                size = list(self.kernel_size)
            else:
                size = [self.kernel_size,self.kernel_size]   
            if (self.pooling == 'MAX'):
                self.activation = tf.nn.max_pool(self.x,[1]+size+[1],[1,self.stride,self.stride,1],padding=self.padding)
            elif (self.pooling == 'AVG'):
                self.activation = tf.nn.avg_pool(self.x,[1]+size+[1],[1,self.stride,self.stride,1],padding=self.padding)
        return self.activation
    
class EmbeddingLayer():
    
    number = 0
    
    def __init__(self,vocabulary_size,embedding_diamension,pretrained_word2vec=True,as_sequences=None):
        self.embedding_diamension = embedding_diamension
        self.vocabulary_size = vocabulary_size
        self.pretrained_word2vec = pretrained_word2vec
        self.variable_scope_name = 'Embedding-'+str(PoolingLayer.number)
        self.init_Embeddings((self.vocabulary_size,self.embedding_diamension))
        self.as_sequences = as_sequences
        EmbeddingLayer.number+=1
        
    def set_input(self,x):
        with tf.variable_scope(self.variable_scope_name):         
            self.x = x
            if self.as_sequences:
                self.sequence_length = x.shape[1]
        
    def init_Embeddings(self,shape):
        with tf.variable_scope(self.variable_scope_name):  
            if(self.pretrained_word2vec):             
                self.W = tf.Variable(tf.constant(0.0, shape=[self.vocabulary_size, self.embedding_diamension]),trainable=True, name="embedding_weights")
                self.embedding_placeholder = tf.placeholder(tf.float32, [self.vocabulary_size, self.embedding_diamension])
                self.embedding_init = self.W.assign(self.embedding_placeholder)
            else:
                self.W = tf.get_variable("embedding_weights", shape=[self.vocabulary_size, self.embedding_diamension],initializer=tf.contrib.layers.xavier_initializer())
    def forward(self):
        # TODO: implement as sequences to provide data for lstm
        if self.x == None:
            print('no input defined')
        else:
            #this is called activation to follow the pattern of other layers
            self.activation = tf.nn.embedding_lookup(self.W, self.x)
            if self.as_sequences == 'static':
                self.activation = tf.transpose(self.activation, [1,0,2])
                self.activation = tf.reshape(self.activation , [-1,self.embedding_diamension])
                self.activation  = tf.split(self.activation ,self.sequence_length,0)
            elif self.as_sequences == 'dynamic':
                pass
            else:
                self.activation = tf.expand_dims(self.activation, -1)        #need 4 diamensions to apply convolution
            return self.activation
class RnnLayer():
    number = 0
    def __init__(self,units,activation_function = None,cell_type = 'LSTM',keep_prob=1.0,rnn_type='static'):
        self.units = units
        self.activation_function = activation_function
        self.keep_prob = keep_prob
        self.cell_type = cell_type
        self.rnn_type = rnn_type
        self.variable_scope_name = 'Rnn-'+str(RnnLayer.number)
        RnnLayer.number+=1
        
    def set_input(self,x):
        with tf.variable_scope(self.variable_scope_name):         
            self.x = x
        
    def forward(self):
        if self.x == None:
            print('no input defined')
        else:
            if self.cell_type == 'LSTM':
                cell = tf.nn.rnn_cell.LSTMCell(self.units,activation=self.activation_function)
                cell = tf.nn.rnn_cell.DropoutWrapper(cell=cell, output_keep_prob=self.keep_prob)
            elif self.cell_type == 'GRU':
                cell = tf.nn.rnn_cell.GRUCell(self.units,activation=self.activation_function)
                cell = tf.nn.rnn_cell.DropoutWrapper(cell=cell, output_keep_prob=self.keep_prob)
                
            #need to fix this
            #self.activation = tf.nn.dynamic_rnn(cell,self.x,sequence_length=self.sequence_length)
            if (self.rnn_type == 'static'):
                self.output, self.states = tf.nn.static_rnn(cell,self.x,dtype=tf.float32)
                self.activation = self.output[-1]
            elif (self.rnn_type == 'dynamic'):
                self.output, self.states = tf.nn.dynamic_rnn(cell,self.x,dtype=tf.float32)
                self.activation = self.output[:,-1,:]
        return self.activation
    

In [3]:
tf.__version__

'1.4.0'

In [4]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('D:/programming/python/datasets/mnist_data', one_hot=True)

X_train = mnist.train.images
y_train = mnist.train.labels
X_test = mnist.test.images
y_test = mnist.test.labels
# X_train = np.concatenate((X_train,X_test))
# y_train = np.concatenate((y_train,y_test))
y_train = np.argmax(y_train,axis=1).reshape((-1,1))

Extracting D:/programming/python/datasets/mnist_data\train-images-idx3-ubyte.gz
Extracting D:/programming/python/datasets/mnist_data\train-labels-idx1-ubyte.gz
Extracting D:/programming/python/datasets/mnist_data\t10k-images-idx3-ubyte.gz
Extracting D:/programming/python/datasets/mnist_data\t10k-labels-idx1-ubyte.gz


In [5]:
class SAE():
    
    def __init__(self,X_train,layer_dimensions):
        self.X_train = X_train
        self.input_dimensions = X_train.shape[1]
        self.layer_dimensions = layer_dimensions
        self.init_layers()
        with tf.variable_scope('encoder'):
            self.encoder = DNN(X_train,X_train,self.encoder_layers)
            self.encoder_activations = self.encoder.forward()
            self.encoder_output = self.encoder_activations[-1]
        with tf.variable_scope('decoder'):
            self.decoder = DNN(self.encoder_output,self.encoder.y,self.decoder_layers,as_placeholders=False)# transfoermed x and previous y which were placeholders
            self.decoder_activations = self.decoder.forward()
    
    def init_layers(self):
        
        self.encoder_layers = []
        self.decoder_layers = []
        for i in self.layer_dimensions[:-1]:
            self.encoder_layers.append(DenseLayer(i))
        self.encoder_layers.append(DenseLayer(self.layer_dimensions[-1],activation_function=None)) ##bottleneck layer
        for i in reversed(self.layer_dimensions[:-1]):
            self.decoder_layers.append(DenseLayer(i))
        self.decoder_layers.append(DenseLayer(self.input_dimensions,activation_function=None)) #output layer no activation
        
    def fit(self,num_epochs,batch_size):

        loss = self.decoder.squared_cost()
        tf.summary.scalar('loss', loss)
        optimizer = tf.train.AdamOptimizer()
        train = optimizer.minimize(loss)

        correct_pred = tf.equal(tf.argmax(self.decoder_activations[-1], 1), tf.argmax(self.encoder.X, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        tf.summary.scalar('accuracy', accuracy)
        merged = tf.summary.merge_all()
        
        saver = tf.train.Saver()

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            #write training data to file in order to visualize it with tensorboard
            log_name = datetime.now().strftime("%Y%m%d-%H%M%S")
            #train_writer = tf.summary.FileWriter('summaries/train/logs'+log_name,sess.graph)
            test_writer = tf.summary.FileWriter('summaries/test/logs'+log_name,sess.graph)

            #training 
            for i in tqdm(range(num_epochs)):
                for c,batch in enumerate(range(int(np.ceil(mnist.train.num_examples/batch_size)))):
                    batch = mnist.train.next_batch(batch_size=batch_size)
                    batch_x = batch[0]
                    feed_dict={self.encoder.X : batch_x,self.encoder.y : batch_x}
                    cost,_,acc= sess.run([loss,train,accuracy],feed_dict=feed_dict)

                feed_dict={self.encoder.X : self.X_train,self.encoder.y : self.X_train}
                summary,test_acc,test_cost = sess.run([merged,accuracy,loss],feed_dict=feed_dict)
                test_writer.add_summary(summary, i)  
            feed_dict={self.encoder.X : self.X_train,self.encoder.y : self.X_train}
            self.extract_feature = sess.run(self.encoder_output,feed_dict=feed_dict)
            saver.save(sess, './model/my-test-model')
        
        

In [6]:
class ClusteringLayer():
    
    number = 0 
    
    def __init__(self, n_clusters, alpha=1.0,initialize_weights = None):
        self.n_clusters = n_clusters
        self.alpha = alpha
        self.initialize_weights = initialize_weights
        self.variable_scope_name = 'Clustering-'+str(ClusteringLayer.number)
        ClusteringLayer.number+=1
        
    def set_input(self,x):
        
        self.x = x

        with tf.variable_scope(self.variable_scope_name):  
            self.init_Clusters((self.n_clusters,self.x.get_shape().as_list()[1]))
        
    def init_Clusters(self,shape):
        if self.initialize_weights is None: 
            self.clusters=tf.get_variable('weight',shape=shape,initializer=tf.contrib.layers.xavier_initializer())
        else:
            self.clusters=tf.get_variable('weight',shape=shape,initializer=tf.constant_initializer(self.initialize_weights))
        tf.summary.histogram('weight',self.clusters)
        
    def forward(self):
        if self.x == None:
            print('no input defined')
        else:
            with tf.name_scope('students_t'):
                q = 1.0 / (1.0 + (tf.reduce_sum(tf.square(tf.expand_dims(self.x, axis=1) - self.clusters), axis=2) / self.alpha))
                q **= (self.alpha + 1.0) / 2.0
                q = tf.transpose(tf.transpose(q) / tf.reduce_sum(q, axis=1))
                return q
        
    def target_distribution(self,q):
        weight = tf.square(q) / tf.reduce_sum(q,axis=0)
        return tf.transpose( tf.transpose(weight) /tf.reduce_sum(weight,axis=1))
            
        

    

In [None]:
tf.reset_default_graph()
DenseLayer.number = 0
ConvLayer.number = 0
PoolingLayer.number = 0
EmbeddingLayer.number = 0
RnnLayer.number = 0
SAE.number = 0


sae = SAE(X_train,[500, 500, 2000, 10])
sae.fit(1,256)
sae.extract_feature.shape

In [None]:
#restore the weights i need from the encoder
tf.reset_default_graph()
with tf.Session() as sess:
    saver = tf.train.import_meta_graph('./model/my-test-model.meta')##copies the graph
    sess.run(tf.global_variables_initializer())
    graph = tf.get_default_graph()

#check all variables that exist
#     for i in graph.get_collection('variables'):
#         if 'encoder' in  i.name:
#             weights.append(graph.get_tensor_by_name(i.name).eval())
#             print(i.get_variable_scope

    weights=[]
    for i in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='encoder'):
        if 'weight:0' in  i.name:
            weights.append(graph.get_tensor_by_name(i.name).eval())
            print(i)
#     get_operation = graph.get_tensor_by_name('encoder/activations_3/tag')
#     features = sess.run(get_operation)

In [None]:
from sklearn.cluster import KMeans
tf.reset_default_graph()
DenseLayer.number = 0
ConvLayer.number = 0
PoolingLayer.number = 0
EmbeddingLayer.number = 0
RnnLayer.number = 0
SAE.number = 0
ClusteringLayer.number = 0

def cluster_acc(y_true, y_pred):
    """
    Calculate clustering accuracy. Require scikit-learn installed
    # Arguments
        y: true labels, numpy.array with shape `(n_samples,)`
        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
    # Return
        accuracy, in [0,1]
    """
    y_true = y_true.astype(np.int64)
    assert y_pred.size == y_true.size
    D = max(y_pred.max(), y_true.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(y_pred.size):
        w[y_pred[i], y_true[i]] += 1
    from sklearn.utils.linear_assignment_ import linear_assignment
    ind = linear_assignment(w.max() - w)
    return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size

def target_distribution_np(q):
    weight = np.square(q) / np.sum(q,axis=0)
    return np.transpose( np.transpose(weight) /np.sum(weight,axis=1))

def target_distribution(q):
    with tf.name_scope('target_distribution'):
        weight = tf.square(q) / tf.reduce_sum(q,axis=0)
        return tf.transpose( tf.transpose(weight) /tf.reduce_sum(weight,axis=1))

p = tf.placeholder(dtype=tf.float32,shape=(None,10))
#create encoder with pretrained weights
# encoder = DNN(X_train,y_train,[DenseLayer(500,initialize_weights=weights[0]),DenseLayer(500,initialize_weights=weights[1]),\
#                                DenseLayer(2000,initialize_weights=weights[2]),DenseLayer(10,initialize_weights=weights[3]),\
#                                 ],dtypes=[tf.float32,tf.int32])

encoder = DNN(X_train,y_train,[DenseLayer(500),DenseLayer(500),\
                               DenseLayer(2000),DenseLayer(10,activation_function=None),\
                                ],dtypes=[tf.float32,tf.int32])

encoder_output = encoder.forward()[-1]
#get encoder features
# with tf.Session() as sess:
#     sess.run(tf.global_variables_initializer())
#     features = sess.run(encoder_output,feed_dict={encoder.X:X_train,encoder.y:y_train})

# #get kmean weights to initialize clustering layer
# print('kmeans')
# n_clusters=10
# km = KMeans(n_clusters, n_init=20)
# y_pred = km.fit_predict(features)
# kmeans_weights =km.cluster_centers_ 
# print(km.cluster_centers_.shape)

#clustering = DNN(encoder_output,encoder.y,[ClusteringLayer(10,initialize_weights=kmeans_weights)],as_placeholders=False)
clustering = DNN(encoder_output,encoder.y,[ClusteringLayer(10)],as_placeholders=False)
clustering_activations = clustering.forward()
############################################print(clustering.layers[-1].variable_scope_name)
q = clustering_activations[-1]
print(q)
#p = target_distribution(q)
#p = target_distribution(q)
print(p)
kld_cost = encoder.kl_divergence(p,q)
tf.summary.scalar('loss',kld_cost)
optimizer = tf.train.MomentumOptimizer(0.001,0.9)
#optimizer = tf.train.AdamOptimizer()
train = optimizer.minimize(kld_cost)

merged = tf.summary.merge_all()

num_epochs = 100
batch_size = 256
tol=1e-3

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    #write training data to file in order to visualize it with tensorboard
    log_name = datetime.now().strftime("%Y%m%d-%H%M%S")
    #train_writer = tf.summary.FileWriter('summaries/train/logs'+log_name,sess.graph)
    test_writer = tf.summary.FileWriter('summaries/test/logs'+log_name,sess.graph)
    
    
    q_value=sess.run(q,feed_dict={encoder.X : X_train})  

    p_value = target_distribution_np(q_value)

    cost=sess.run(kld_cost,feed_dict={encoder.X : X_train,p:p_value})  
    batch_train_acc = cluster_acc(y_train,np.argmax(q_value,axis=1).reshape(-1,1))
    print("---> cost: "+str(cost)+"\taccuracy: "+ str(batch_train_acc)+' BEFORE TRAINING')
    
    
    #training 
    y_pred_last = 1
    for i in tqdm(range(num_epochs)):
        for c,batch in enumerate(range(int(np.ceil(mnist.train.num_examples/batch_size)))):
            batch = mnist.train.next_batch(batch_size=batch_size)
            batch_x = batch[0]
            batch_y = batch[1]
            batch_y = np.argmax(batch_y,axis=1).reshape(-1,1)

            q_value=sess.run(q,feed_dict={encoder.X : batch_x})  
            p_value = target_distribution_np(q_value)
            #cost,_,=sess.run([kld_cost,train],feed_dict={encoder.X : batch_x,p:p_value,encoder.y : batch_y }) 
            p_value,q_value,cost,_,=sess.run([p,q,kld_cost,train],feed_dict={encoder.X : batch_x , p:p_value}) 
            batch_train_acc = cluster_acc(batch_y,np.argmax(q_value,axis=1).reshape(-1,1))
            if c%40==0:
                print("cost: "+str(cost)+"\taccuracy: "+ str(batch_train_acc)+" in " + str(i) + 'epoch in ' +str(c) +'batch')
        
        q_value=sess.run(q,feed_dict={encoder.X : X_train})  
        p_value = target_distribution_np(q_value)
        summary,q_value,cost=sess.run([merged,q,kld_cost],feed_dict={encoder.X : X_train, p:p_value}) 
        test_writer.add_summary(summary, i)
        batch_train_acc = cluster_acc(y_train,np.argmax(q_value,axis=1).reshape(-1,1))
        print("---> cost: "+str(cost)+"\taccuracy: "+ str(batch_train_acc)+" in " + str(i) + 'epoch')
        


Tensor("students_t/transpose_1:0", shape=(?, 10), dtype=float32)
Tensor("Placeholder:0", shape=(?, 10), dtype=float32)
---> cost: 114.844	accuracy: 0.162981818182 BEFORE TRAINING


  0%|                                                                                          | 0/100 [00:00<?, ?it/s]

cost: 0.552252	accuracy: 0.16796875 in 0epoch in 0batch
cost: 45.4959	accuracy: 0.3046875 in 0epoch in 40batch
cost: 52.8123	accuracy: 0.33984375 in 0epoch in 80batch
cost: 49.8124	accuracy: 0.34765625 in 0epoch in 120batch
cost: 47.2145	accuracy: 0.33984375 in 0epoch in 160batch
cost: 46.7898	accuracy: 0.3515625 in 0epoch in 200batch
---> cost: 9506.76	accuracy: 0.333581818182 in 0epoch


  1%|▊                                                                                 | 1/100 [00:10<16:38, 10.09s/it]

cost: 43.5548	accuracy: 0.359375 in 1epoch in 0batch
cost: 43.3218	accuracy: 0.35546875 in 1epoch in 40batch
cost: 42.9128	accuracy: 0.40234375 in 1epoch in 80batch
cost: 42.134	accuracy: 0.375 in 1epoch in 120batch
cost: 42.0757	accuracy: 0.40625 in 1epoch in 160batch
cost: 36.6222	accuracy: 0.40625 in 1epoch in 200batch
---> cost: 8766.96	accuracy: 0.375836363636 in 1epoch


  2%|█▋                                                                                | 2/100 [00:20<16:22, 10.02s/it]

cost: 43.241	accuracy: 0.38671875 in 2epoch in 0batch
cost: 35.393	accuracy: 0.38671875 in 2epoch in 40batch
cost: 35.5259	accuracy: 0.41015625 in 2epoch in 80batch
cost: 39.6148	accuracy: 0.3828125 in 2epoch in 120batch
cost: 36.0751	accuracy: 0.38671875 in 2epoch in 160batch
cost: 27.7309	accuracy: 0.390625 in 2epoch in 200batch
---> cost: 7008.88	accuracy: 0.3894 in 2epoch


  3%|██▍                                                                               | 3/100 [00:29<16:07,  9.97s/it]

cost: 31.8648	accuracy: 0.37890625 in 3epoch in 0batch
cost: 32.2895	accuracy: 0.4296875 in 3epoch in 40batch
cost: 33.6887	accuracy: 0.42578125 in 3epoch in 80batch
cost: 35.1291	accuracy: 0.4453125 in 3epoch in 120batch
cost: 31.1414	accuracy: 0.390625 in 3epoch in 160batch
cost: 23.5271	accuracy: 0.45703125 in 3epoch in 200batch
---> cost: 7339.39	accuracy: 0.404436363636 in 3epoch


  4%|███▎                                                                              | 4/100 [00:39<15:55,  9.95s/it]

cost: 34.8467	accuracy: 0.421875 in 4epoch in 0batch
cost: 26.5748	accuracy: 0.375 in 4epoch in 40batch
cost: 30.3414	accuracy: 0.390625 in 4epoch in 80batch
cost: 27.3542	accuracy: 0.4296875 in 4epoch in 120batch
cost: 27.5651	accuracy: 0.4609375 in 4epoch in 160batch
cost: 33.6962	accuracy: 0.4375 in 4epoch in 200batch
---> cost: 5872.8	accuracy: 0.420836363636 in 4epoch


  5%|████                                                                              | 5/100 [00:49<15:47,  9.98s/it]

cost: 26.0774	accuracy: 0.44921875 in 5epoch in 0batch
cost: 24.803	accuracy: 0.46875 in 5epoch in 40batch
cost: 26.1252	accuracy: 0.43359375 in 5epoch in 80batch
cost: 31.9886	accuracy: 0.4453125 in 5epoch in 120batch
cost: 30.912	accuracy: 0.40234375 in 5epoch in 160batch
cost: 30.4277	accuracy: 0.4296875 in 5epoch in 200batch
---> cost: 5320.45	accuracy: 0.404181818182 in 5epoch


  6%|████▉                                                                             | 6/100 [00:59<15:37,  9.97s/it]

cost: 22.9974	accuracy: 0.44921875 in 6epoch in 0batch
cost: 25.957	accuracy: 0.43359375 in 6epoch in 40batch
cost: 24.1903	accuracy: 0.4375 in 6epoch in 80batch
cost: 28.6841	accuracy: 0.421875 in 6epoch in 120batch
cost: 24.2477	accuracy: 0.4375 in 6epoch in 160batch
cost: 32.7027	accuracy: 0.3671875 in 6epoch in 200batch
---> cost: 6845.97	accuracy: 0.380181818182 in 6epoch


  7%|█████▋                                                                            | 7/100 [01:09<15:23,  9.93s/it]

cost: 33.9521	accuracy: 0.37109375 in 7epoch in 0batch
cost: 28.6466	accuracy: 0.4609375 in 7epoch in 40batch
cost: 30.147	accuracy: 0.421875 in 7epoch in 80batch
cost: 23.2482	accuracy: 0.41796875 in 7epoch in 120batch
cost: 24.0532	accuracy: 0.44140625 in 7epoch in 160batch
cost: 24.8192	accuracy: 0.49609375 in 7epoch in 200batch
---> cost: 4517.29	accuracy: 0.3848 in 7epoch


  8%|██████▌                                                                           | 8/100 [01:19<15:11,  9.91s/it]

cost: 19.4394	accuracy: 0.4140625 in 8epoch in 0batch
cost: 30.7655	accuracy: 0.390625 in 8epoch in 40batch
cost: 25.4023	accuracy: 0.46875 in 8epoch in 80batch
cost: 29.0489	accuracy: 0.4609375 in 8epoch in 120batch
cost: 28.8428	accuracy: 0.3671875 in 8epoch in 160batch
cost: 22.88	accuracy: 0.375 in 8epoch in 200batch
---> cost: 4631.05	accuracy: 0.399436363636 in 8epoch


  9%|███████▍                                                                          | 9/100 [01:29<15:00,  9.89s/it]

cost: 21.9721	accuracy: 0.48046875 in 9epoch in 0batch
cost: 30.0076	accuracy: 0.484375 in 9epoch in 40batch
cost: 22.0148	accuracy: 0.38671875 in 9epoch in 80batch
cost: 29.5916	accuracy: 0.41015625 in 9epoch in 120batch
cost: 24.4933	accuracy: 0.44921875 in 9epoch in 160batch
cost: 25.5285	accuracy: 0.34765625 in 9epoch in 200batch
---> cost: 4691.21	accuracy: 0.396236363636 in 9epoch


 10%|████████                                                                         | 10/100 [01:38<14:46,  9.85s/it]

cost: 19.8895	accuracy: 0.4375 in 10epoch in 0batch
cost: 30.8074	accuracy: 0.40234375 in 10epoch in 40batch
cost: 19.7563	accuracy: 0.4296875 in 10epoch in 80batch
cost: 24.7084	accuracy: 0.38671875 in 10epoch in 120batch
cost: 28.1346	accuracy: 0.375 in 10epoch in 160batch
cost: 35.8579	accuracy: 0.390625 in 10epoch in 200batch
---> cost: 6793.33	accuracy: 0.385872727273 in 10epoch


 11%|████████▉                                                                        | 11/100 [01:47<14:32,  9.81s/it]

cost: 30.3534	accuracy: 0.44921875 in 11epoch in 0batch
cost: 28.3791	accuracy: 0.40625 in 11epoch in 40batch
cost: 19.2023	accuracy: 0.39453125 in 11epoch in 80batch
cost: 25.0235	accuracy: 0.4765625 in 11epoch in 120batch
cost: 31.0903	accuracy: 0.41015625 in 11epoch in 160batch
cost: 30.5619	accuracy: 0.38671875 in 11epoch in 200batch
---> cost: 6609.24	accuracy: 0.385945454545 in 11epoch


 12%|█████████▋                                                                       | 12/100 [01:56<14:17,  9.74s/it]

cost: 32.9995	accuracy: 0.39453125 in 12epoch in 0batch
cost: 36.166	accuracy: 0.390625 in 12epoch in 40batch
cost: 22.9638	accuracy: 0.42578125 in 12epoch in 80batch
cost: 19.8943	accuracy: 0.390625 in 12epoch in 120batch
cost: 25.6064	accuracy: 0.43359375 in 12epoch in 160batch
cost: 25.5121	accuracy: 0.421875 in 12epoch in 200batch
---> cost: 5114.33	accuracy: 0.362672727273 in 12epoch


 13%|██████████▌                                                                      | 13/100 [02:06<14:05,  9.71s/it]

cost: 24.1181	accuracy: 0.38671875 in 13epoch in 0batch
cost: 21.0657	accuracy: 0.39453125 in 13epoch in 40batch
cost: 26.1289	accuracy: 0.4140625 in 13epoch in 80batch
cost: 21.2564	accuracy: 0.48046875 in 13epoch in 120batch
cost: 33.1121	accuracy: 0.4375 in 13epoch in 160batch
cost: 27.1447	accuracy: 0.40625 in 13epoch in 200batch
---> cost: 5807.26	accuracy: 0.328472727273 in 13epoch


 14%|███████████▎                                                                     | 14/100 [02:15<13:52,  9.68s/it]

cost: 29.562	accuracy: 0.3515625 in 14epoch in 0batch
cost: 20.1789	accuracy: 0.3984375 in 14epoch in 40batch
cost: 22.7198	accuracy: 0.4296875 in 14epoch in 80batch
cost: 27.4609	accuracy: 0.33203125 in 14epoch in 120batch
cost: 18.8359	accuracy: 0.375 in 14epoch in 160batch
cost: 27.9986	accuracy: 0.42578125 in 14epoch in 200batch
---> cost: 4924.94	accuracy: 0.372090909091 in 14epoch


 15%|████████████▏                                                                    | 15/100 [02:24<13:40,  9.65s/it]

cost: 23.3765	accuracy: 0.40625 in 15epoch in 0batch
cost: 24.4535	accuracy: 0.41015625 in 15epoch in 40batch
cost: 25.2053	accuracy: 0.3828125 in 15epoch in 80batch
cost: 28.439	accuracy: 0.35546875 in 15epoch in 120batch
cost: 25.886	accuracy: 0.390625 in 15epoch in 160batch
cost: 19.2613	accuracy: 0.390625 in 15epoch in 200batch
---> cost: 5706.09	accuracy: 0.3418 in 15epoch


 16%|████████████▉                                                                    | 16/100 [02:34<13:30,  9.65s/it]

cost: 25.9017	accuracy: 0.38671875 in 16epoch in 0batch
cost: 20.5681	accuracy: 0.4453125 in 16epoch in 40batch
cost: 23.9721	accuracy: 0.37109375 in 16epoch in 80batch
cost: 26.2646	accuracy: 0.421875 in 16epoch in 120batch
cost: 19.1223	accuracy: 0.40625 in 16epoch in 160batch
cost: 24.891	accuracy: 0.41015625 in 16epoch in 200batch
---> cost: 6002.3	accuracy: 0.388581818182 in 16epoch


 17%|█████████████▊                                                                   | 17/100 [02:43<13:17,  9.60s/it]

cost: 26.7187	accuracy: 0.43359375 in 17epoch in 0batch
cost: 22.0543	accuracy: 0.37890625 in 17epoch in 40batch
cost: 16.7057	accuracy: 0.42578125 in 17epoch in 80batch
cost: 24.9766	accuracy: 0.39453125 in 17epoch in 120batch
cost: 24.8493	accuracy: 0.3828125 in 17epoch in 160batch
cost: 28.478	accuracy: 0.3671875 in 17epoch in 200batch
---> cost: 4626.48	accuracy: 0.3694 in 17epoch


 18%|██████████████▌                                                                  | 18/100 [02:52<13:06,  9.59s/it]

cost: 21.6614	accuracy: 0.39453125 in 18epoch in 0batch
cost: 24.157	accuracy: 0.42578125 in 18epoch in 40batch
cost: 16.4843	accuracy: 0.4375 in 18epoch in 80batch
cost: 26.7446	accuracy: 0.421875 in 18epoch in 120batch
cost: 15.9096	accuracy: 0.44921875 in 18epoch in 160batch
cost: 18.6134	accuracy: 0.44921875 in 18epoch in 200batch
---> cost: 7000.7	accuracy: 0.365854545455 in 18epoch


 19%|███████████████▍                                                                 | 19/100 [03:01<12:54,  9.57s/it]

cost: 31.1343	accuracy: 0.40625 in 19epoch in 0batch
cost: 16.5615	accuracy: 0.39453125 in 19epoch in 40batch
cost: 21.4789	accuracy: 0.3828125 in 19epoch in 80batch
cost: 19.1711	accuracy: 0.43359375 in 19epoch in 120batch
cost: 12.9587	accuracy: 0.39453125 in 19epoch in 160batch
cost: 25.1261	accuracy: 0.3671875 in 19epoch in 200batch
---> cost: 6241.63	accuracy: 0.371763636364 in 19epoch


 20%|████████████████▏                                                                | 20/100 [03:11<12:45,  9.57s/it]

cost: 28.5132	accuracy: 0.328125 in 20epoch in 0batch
cost: 20.4278	accuracy: 0.41015625 in 20epoch in 40batch
cost: 17.9144	accuracy: 0.38671875 in 20epoch in 80batch
cost: 18.0032	accuracy: 0.45703125 in 20epoch in 120batch
cost: 13.3213	accuracy: 0.43359375 in 20epoch in 160batch
cost: 19.6606	accuracy: 0.421875 in 20epoch in 200batch
---> cost: 3781.69	accuracy: 0.3806 in 20epoch


 21%|█████████████████                                                                | 21/100 [03:20<12:34,  9.56s/it]

cost: 18.1459	accuracy: 0.40234375 in 21epoch in 0batch
cost: 17.2514	accuracy: 0.4296875 in 21epoch in 40batch
cost: 15.5157	accuracy: 0.40625 in 21epoch in 80batch
cost: 16.7743	accuracy: 0.4375 in 21epoch in 120batch
cost: 19.4388	accuracy: 0.38671875 in 21epoch in 160batch
cost: 18.2442	accuracy: 0.4453125 in 21epoch in 200batch
---> cost: 3551.56	accuracy: 0.431363636364 in 21epoch


 22%|█████████████████▊                                                               | 22/100 [03:30<12:25,  9.55s/it]

cost: 16.0894	accuracy: 0.484375 in 22epoch in 0batch
cost: 18.9278	accuracy: 0.44921875 in 22epoch in 40batch
cost: 16.2932	accuracy: 0.51171875 in 22epoch in 80batch
cost: 17.1755	accuracy: 0.4140625 in 22epoch in 120batch
cost: 24.0262	accuracy: 0.4609375 in 22epoch in 160batch
cost: 22.2811	accuracy: 0.45703125 in 22epoch in 200batch
---> cost: 3628.81	accuracy: 0.436545454545 in 22epoch


 23%|██████████████████▋                                                              | 23/100 [03:39<12:15,  9.55s/it]

cost: 17.0654	accuracy: 0.43359375 in 23epoch in 0batch
cost: 28.8673	accuracy: 0.4453125 in 23epoch in 40batch
cost: 17.9093	accuracy: 0.43359375 in 23epoch in 80batch
cost: 14.0141	accuracy: 0.44921875 in 23epoch in 120batch
cost: 19.1824	accuracy: 0.421875 in 23epoch in 160batch
cost: 15.1571	accuracy: 0.4453125 in 23epoch in 200batch
---> cost: 2956.41	accuracy: 0.4186 in 23epoch


 24%|███████████████████▍                                                             | 24/100 [03:49<12:05,  9.54s/it]

cost: 13.3029	accuracy: 0.421875 in 24epoch in 0batch
cost: 14.983	accuracy: 0.44140625 in 24epoch in 40batch
cost: 20.1588	accuracy: 0.44921875 in 24epoch in 80batch
cost: 17.9239	accuracy: 0.4765625 in 24epoch in 120batch
cost: 13.731	accuracy: 0.42578125 in 24epoch in 160batch
cost: 12.3364	accuracy: 0.390625 in 24epoch in 200batch
---> cost: 2838.27	accuracy: 0.398945454545 in 24epoch


 25%|████████████████████▎                                                            | 25/100 [03:58<11:56,  9.55s/it]

cost: 11.5172	accuracy: 0.4375 in 25epoch in 0batch
cost: 14.8553	accuracy: 0.421875 in 25epoch in 40batch
cost: 13.2692	accuracy: 0.4296875 in 25epoch in 80batch
cost: 15.4738	accuracy: 0.42578125 in 25epoch in 120batch
cost: 15.0603	accuracy: 0.4140625 in 25epoch in 160batch
cost: 13.9173	accuracy: 0.49609375 in 25epoch in 200batch
---> cost: 2420.8	accuracy: 0.416709090909 in 25epoch


 26%|█████████████████████                                                            | 26/100 [04:08<11:46,  9.54s/it]

cost: 12.7509	accuracy: 0.45703125 in 26epoch in 0batch
cost: 12.1267	accuracy: 0.50390625 in 26epoch in 40batch
cost: 18.5636	accuracy: 0.4375 in 26epoch in 80batch
cost: 14.8211	accuracy: 0.390625 in 26epoch in 120batch
cost: 18.106	accuracy: 0.39453125 in 26epoch in 160batch
cost: 16.9155	accuracy: 0.40234375 in 26epoch in 200batch
---> cost: 3189.42	accuracy: 0.406418181818 in 26epoch


 27%|█████████████████████▊                                                           | 27/100 [04:17<11:36,  9.54s/it]

cost: 16.6359	accuracy: 0.40234375 in 27epoch in 0batch
cost: 16.5697	accuracy: 0.421875 in 27epoch in 40batch
cost: 17.5778	accuracy: 0.3515625 in 27epoch in 80batch
cost: 24.4602	accuracy: 0.4375 in 27epoch in 120batch
cost: 18.3768	accuracy: 0.4140625 in 27epoch in 160batch
cost: 16.3886	accuracy: 0.41796875 in 27epoch in 200batch
---> cost: 3665.43	accuracy: 0.376927272727 in 27epoch


 28%|██████████████████████▋                                                          | 28/100 [04:26<11:26,  9.53s/it]

cost: 17.3359	accuracy: 0.42578125 in 28epoch in 0batch
cost: 14.9769	accuracy: 0.39453125 in 28epoch in 40batch
cost: 14.7968	accuracy: 0.3671875 in 28epoch in 80batch
cost: 22.7102	accuracy: 0.37890625 in 28epoch in 120batch
cost: 22.2554	accuracy: 0.46484375 in 28epoch in 160batch


In [None]:
def cluster_acc(y_true, y_pred):
    """
    Calculate clustering accuracy. Require scikit-learn installed
    # Arguments
        y: true labels, numpy.array with shape `(n_samples,)`
        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
    # Return
        accuracy, in [0,1]
    """
    y_true = y_true.astype(np.int64)
    assert y_pred.size == y_true.size
    D = max(y_pred.max(), y_true.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(y_pred.size):
        w[y_pred[i], y_true[i]] += 1
    from sklearn.utils.linear_assignment_ import linear_assignment
    ind = linear_assignment(w.max() - w)
    return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size

print('performing k-means clustering on the extracted features')

n_clusters=10
print(sae.extract_feature.shape)
km = KMeans(n_clusters, n_init=20)
y_pred = km.fit_predict(sae.extract_feature)
y = y_train.reshape(-1)
from sklearn.metrics import normalized_mutual_info_score as nmi
from sklearn.metrics import accuracy_score 
print('K-means clustering result on extracted features: NMI = '+str(nmi(y, y_pred)))
print('K-means clustering result on extracted features: accuracy = '+str(cluster_acc(y, y_pred)))


In [None]:
import time
from sklearn.manifold import TSNE
from ggplot import *

def tsne(x,y,n_sne):  
    time_start = time.time()
    tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
    tsne_results = tsne.fit_transform(x[:n_sne])
    print(x.shape)
    print(y.shape)
    print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))
    x_tsne = np.expand_dims(tsne_results[:,0],1)
    y_tsne = np.expand_dims(tsne_results[:,1],1)
    label = np.int32(y[:n_sne])
    data = np.concatenate((x_tsne,y_tsne,label),axis=1)

    df_tsne = pd.DataFrame(data=data,columns=['x-tsne','y-tsne','label'])
    df_tsne.label  = df_tsne.label.map(lambda x:  str(x))
    chart = ggplot( df_tsne, aes(x='x-tsne', y='y-tsne', color='label') ) \
            + geom_point(size=70,alpha=0.1) \
            + ggtitle("tSNE dimensions colored by digit")
    chart.show()
tsne(X_train,y_train,5000)
tsne(sae.extract_feature,y_train,5000)