In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn as sk
%matplotlib inline
from sklearn.model_selection import train_test_split
from datetime import datetime
from tqdm import tqdm
from sklearn.utils import shuffle
import nltk


In [2]:
# %load DNN.py
class DNN():
    
    number = 0
    
    def __init__(self,X_train,y_train,layers,as_placeholders=True,dtypes=[tf.float32,tf.float32],**kwargs):
        self.X_train = X_train
        self.y_train = y_train
        if as_placeholders:
            self.X = tf.placeholder(dtype=dtypes[0],shape=(None,)+X_train.shape[1:])
            self.y = tf.placeholder(dtype=dtypes[1],shape=(None,)+y_train.shape[1:])
        else:
            self.X = X_train
            self.y = y_train
        self.layers = layers
        self.activations = [self.X]
        DNN.number +=1
            
    def forward(self):       
        for i,layer in enumerate(self.layers):
            layer.set_input(self.activations[i])
            self.activations.append(layer.forward())
        return self.activations
    
    def cost(self):
        #return tf.reduce_sum(tf.square(self.activations[-1]-self.y))
        return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.activations[-1],labels=self.y))
    
    def squared_cost(self):
        return tf.reduce_mean(tf.squared_difference(self.activations[-1], self.y))
    
    def kl_divergence(self,p, q): 
        return tf.reduce_sum(p * tf.log(p/q))
        #return -0.5 * tf.reduce_sum(1 + p - tf.square(q) - tf.exp(p))
#         temp = p/q
#         return tf.reduce_mean(-tf.nn.softmax_cross_entropy_with_logits(labels=p, logits=temp))
       # return tf.contrib.distributions.kl_divergence(p, q,)
class DenseLayer():
    
    number = 0
    
    def __init__(self,units,activation_function=tf.nn.relu,batch_norm=False,keep_prob=1.0,initialize_weights=None):
        self.units = units
        self.keep_prob = keep_prob
        self.activation_function = activation_function
        self.batch_norm = batch_norm
        self.variable_scope_name = 'Dense-'+str(DenseLayer.number)
        self.initialize_weights = initialize_weights
        DenseLayer.number+=1
        
    def set_input(self,x):
        self.x = x
        if(len(x.shape)==4):
            shape = self.x.get_shape().as_list()        
            dim = np.prod(shape[1:])
            self.x = tf.reshape(tensor=self.x,shape=[-1,dim])

        with tf.variable_scope(self.variable_scope_name):  
            self.init_W((self.x.get_shape().as_list()[1],self.units))
            self.init_b(self.units)
            if(self.batch_norm):
                self.epsilon = 1e-3
                self.scale = tf.get_variable('scale', initializer=tf.ones(shape=[self.units]))
                self.beta =  tf.get_variable('beta', initializer=tf.zeros(shape=[self.units]))

        
    def init_W(self,shape):
        #another way to do this with get variable
        #self.w= tf.Variable(tf.multiply(tf.random_normal(shape),0.01),dtype=tf.float32)'
        if self.initialize_weights is None: 
            self.w=tf.get_variable('weight',shape=shape,initializer=tf.contrib.layers.xavier_initializer())
        else:
            self.w=tf.get_variable('weight',shape=shape,initializer=tf.constant_initializer(self.initialize_weights))
        tf.summary.histogram('weight',self.w)
    
    def init_b(self,shape):
        #self.b = tf.Variable(tf.fill([shape],0.1),dtype=tf.float32)
        init = tf.constant(0.1,shape=[shape])
        self.b = tf.get_variable('bias', initializer=init)
        tf.summary.histogram('bias',self.b)
        
    def forward(self):
        if self.x == None:
            print('no input defined')
        else:
            #batch norm not working properly
            #or maybe it is working properly but it needs bigger batch size. like 64. This makes sense because you make estimates of
            #mean and variance for every z calculation. This means that the bigger the batch size the more accurate the estimate
            if(self.batch_norm):
                self.z = tf.matmul(self.x,self.w)
                self.batch_mean, self.batch_var = tf.nn.moments(self.z,[0])
                self.z  = tf.nn.batch_normalization(self.z,self.batch_mean,self.batch_var,self.beta,self.scale,self.epsilon)
            else:
                self.z = tf.nn.xw_plus_b(self.x,self.w,self.b)
            if self.activation_function == None:
                self.activation = self.z
            else:
                self.activation  = self.activation_function(self.z)
            self.activation = tf.nn.dropout(self.activation,self.keep_prob)
            tf.summary.histogram('activations',self.activation)
            return self.activation
        
class ConvLayer():

    number = 0
    def __init__(self,kernel_size,number_of_kernels,padding='SAME',activation_function=tf.nn.relu,batch_norm = False,keep_prob=1.0):
        self.kernel_size = kernel_size
        self.number_of_kernels = number_of_kernels
        self.padding = padding
        self.activation_function = activation_function
        self.keep_prob = keep_prob
        self.batch_norm = batch_norm
        self.variable_scope_name = 'Conv-'+str(ConvLayer.number)
        ConvLayer.number+=1
        
    def set_input(self,x):
        with tf.variable_scope(self.variable_scope_name):         
            self.x = x
            if(isinstance(self.kernel_size,tuple)):
                self.init_Kernel(self.kernel_size+(x.get_shape().as_list()[-1],self.number_of_kernels))
            else:
                self.init_Kernel((self.kernel_size,self.kernel_size,x.get_shape().as_list()[-1],self.number_of_kernels)) 
            self.init_b(self.number_of_kernels)
            if(self.batch_norm):
                self.epsilon = 1e-3
                self.scale = tf.get_variable('scale', initializer=tf.ones(shape=[self.number_of_kernels]))
                self.beta =  tf.get_variable('beta', initializer=tf.zeros(shape=[self.number_of_kernels]))
        
    def init_Kernel(self,shape):
        self.kernel=tf.get_variable('kernel',shape=shape,initializer=tf.contrib.layers.xavier_initializer())
        tf.summary.histogram('kernel',self.kernel)
        
    def init_b(self,shape):
        self.b = tf.get_variable('bias',shape=[shape],initializer=tf.constant_initializer(0.1))
        tf.summary.histogram('bias',self.b)
        
    def forward(self):
        if self.x == None:
            print('no input defined')
        else:
            if self.batch_norm:
                
                self.z = tf.nn.conv2d(self.x , self.kernel, [1, 1, 1, 1], padding=self.padding)
                self.batch_mean, self.batch_var = tf.nn.moments(self.z,[0,1,2])
                self.z  = tf.nn.batch_normalization(self.z,self.batch_mean,self.batch_var,self.beta,self.scale,self.epsilon)
                
            else:
                self.z = tf.nn.conv2d(self.x , self.kernel, [1, 1, 1, 1], padding=self.padding)
                self.z = tf.nn.bias_add(self.z, self.b)  
                    
            if self.activation_function == None:
                self.activation = self.z
            else:
                self.activation  = self.activation_function(self.z)
            #dropout
            self.activation = tf.nn.dropout(self.activation,self.keep_prob)
            tf.summary.histogram('activations',self.activation)
        return self.activation
        
class PoolingLayer():
    
    number = 0
    
    def __init__(self,kernel_size,stride,padding='SAME',pooling='MAX'):
        self.kernel_size = kernel_size
        self.stride = stride
        self.pooling = pooling
        self.padding = padding
        self.variable_scope_name = 'Pool-'+str(PoolingLayer.number)
        PoolingLayer.number+=1
        
    def set_input(self,x):
        with tf.variable_scope(self.variable_scope_name):         
            self.x = x
        
    def forward(self):
        if self.x == None:
            print('no input defined')
        else:
            if(isinstance(self.kernel_size,tuple)):
                size = list(self.kernel_size)
            else:
                size = [self.kernel_size,self.kernel_size]   
            if (self.pooling == 'MAX'):
                self.activation = tf.nn.max_pool(self.x,[1]+size+[1],[1,self.stride,self.stride,1],padding=self.padding)
            elif (self.pooling == 'AVG'):
                self.activation = tf.nn.avg_pool(self.x,[1]+size+[1],[1,self.stride,self.stride,1],padding=self.padding)
        return self.activation
    
class EmbeddingLayer():
    
    number = 0
    
    def __init__(self,vocabulary_size,embedding_diamension,pretrained_word2vec=True,as_sequences=None):
        self.embedding_diamension = embedding_diamension
        self.vocabulary_size = vocabulary_size
        self.pretrained_word2vec = pretrained_word2vec
        self.variable_scope_name = 'Embedding-'+str(PoolingLayer.number)
        self.init_Embeddings((self.vocabulary_size,self.embedding_diamension))
        self.as_sequences = as_sequences
        EmbeddingLayer.number+=1
        
    def set_input(self,x):
        with tf.variable_scope(self.variable_scope_name):         
            self.x = x
            if self.as_sequences:
                self.sequence_length = x.shape[1]
        
    def init_Embeddings(self,shape):
        with tf.variable_scope(self.variable_scope_name):  
            if(self.pretrained_word2vec):             
                self.W = tf.Variable(tf.constant(0.0, shape=[self.vocabulary_size, self.embedding_diamension]),trainable=True, name="embedding_weights")
                self.embedding_placeholder = tf.placeholder(tf.float32, [self.vocabulary_size, self.embedding_diamension])
                self.embedding_init = self.W.assign(self.embedding_placeholder)
            else:
                self.W = tf.get_variable("embedding_weights", shape=[self.vocabulary_size, self.embedding_diamension],initializer=tf.contrib.layers.xavier_initializer())
    def forward(self):
        # TODO: implement as sequences to provide data for lstm
        if self.x == None:
            print('no input defined')
        else:
            #this is called activation to follow the pattern of other layers
            self.activation = tf.nn.embedding_lookup(self.W, self.x)
            if self.as_sequences == 'static':
                self.activation = tf.transpose(self.activation, [1,0,2])
                self.activation = tf.reshape(self.activation , [-1,self.embedding_diamension])
                self.activation  = tf.split(self.activation ,self.sequence_length,0)
            elif self.as_sequences == 'dynamic':
                pass
            else:
                self.activation = tf.expand_dims(self.activation, -1)        #need 4 diamensions to apply convolution
            return self.activation
class RnnLayer():
    number = 0
    def __init__(self,units,activation_function = None,cell_type = 'LSTM',keep_prob=1.0,rnn_type='static'):
        self.units = units
        self.activation_function = activation_function
        self.keep_prob = keep_prob
        self.cell_type = cell_type
        self.rnn_type = rnn_type
        self.variable_scope_name = 'Rnn-'+str(RnnLayer.number)
        RnnLayer.number+=1
        
    def set_input(self,x):
        with tf.variable_scope(self.variable_scope_name):         
            self.x = x
        
    def forward(self):
        if self.x == None:
            print('no input defined')
        else:
            if self.cell_type == 'LSTM':
                cell = tf.nn.rnn_cell.LSTMCell(self.units,activation=self.activation_function)
                cell = tf.nn.rnn_cell.DropoutWrapper(cell=cell, output_keep_prob=self.keep_prob)
            elif self.cell_type == 'GRU':
                cell = tf.nn.rnn_cell.GRUCell(self.units,activation=self.activation_function)
                cell = tf.nn.rnn_cell.DropoutWrapper(cell=cell, output_keep_prob=self.keep_prob)
                
            #need to fix this
            #self.activation = tf.nn.dynamic_rnn(cell,self.x,sequence_length=self.sequence_length)
            if (self.rnn_type == 'static'):
                self.output, self.states = tf.nn.static_rnn(cell,self.x,dtype=tf.float32)
                self.activation = self.output[-1]
            elif (self.rnn_type == 'dynamic'):
                self.output, self.states = tf.nn.dynamic_rnn(cell,self.x,dtype=tf.float32)
                self.activation = self.output[:,-1,:]
        return self.activation
    

In [3]:
tf.__version__

'1.4.0'

In [27]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('D:/programming/python/datasets/mnist_data', one_hot=True)

X_train = mnist.train.images
y_train = mnist.train.labels
X_test = mnist.test.images
y_test = mnist.test.labels
# X_train = np.concatenate((X_train,X_test))
# y_train = np.concatenate((y_train,y_test))
y_train = np.argmax(y_train,axis=1).reshape((-1,1))

Extracting D:/programming/python/datasets/mnist_data\train-images-idx3-ubyte.gz
Extracting D:/programming/python/datasets/mnist_data\train-labels-idx1-ubyte.gz
Extracting D:/programming/python/datasets/mnist_data\t10k-images-idx3-ubyte.gz
Extracting D:/programming/python/datasets/mnist_data\t10k-labels-idx1-ubyte.gz


In [5]:
class SAE():
    
    def __init__(self,X_train,layer_dimensions):
        self.X_train = X_train
        self.input_dimensions = X_train.shape[1]
        self.layer_dimensions = layer_dimensions
        self.init_layers()
        with tf.variable_scope('encoder'):
            self.encoder = DNN(X_train,X_train,self.encoder_layers)
            self.encoder_activations = self.encoder.forward()
            self.encoder_output = self.encoder_activations[-1]
        with tf.variable_scope('decoder'):
            self.decoder = DNN(self.encoder_output,self.encoder.y,self.decoder_layers,as_placeholders=False)# transfoermed x and previous y which were placeholders
            self.decoder_activations = self.decoder.forward()
    
    def init_layers(self):
        
        self.encoder_layers = []
        self.decoder_layers = []
        for i in self.layer_dimensions[:-1]:
            self.encoder_layers.append(DenseLayer(i))
        self.encoder_layers.append(DenseLayer(self.layer_dimensions[-1],activation_function=None)) ##bottleneck layer
        for i in reversed(self.layer_dimensions[:-1]):
            self.decoder_layers.append(DenseLayer(i))
        self.decoder_layers.append(DenseLayer(self.input_dimensions,activation_function=None)) #output layer no activation
        
    def fit(self,num_epochs,batch_size):

        loss = self.decoder.squared_cost()
        tf.summary.scalar('loss', loss)
        optimizer = tf.train.AdamOptimizer()
        train = optimizer.minimize(loss)

        correct_pred = tf.equal(tf.argmax(self.decoder_activations[-1], 1), tf.argmax(self.encoder.X, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        tf.summary.scalar('accuracy', accuracy)
        merged = tf.summary.merge_all()
        
        saver = tf.train.Saver()

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            #write training data to file in order to visualize it with tensorboard
            log_name = datetime.now().strftime("%Y%m%d-%H%M%S")
            #train_writer = tf.summary.FileWriter('summaries/train/logs'+log_name,sess.graph)
            test_writer = tf.summary.FileWriter('summaries/test/logs'+log_name,sess.graph)

            #training 
            for i in tqdm(range(num_epochs)):
                for c,batch in enumerate(range(int(np.ceil(mnist.train.num_examples/batch_size)))):
                    batch = mnist.train.next_batch(batch_size=batch_size)
                    batch_x = batch[0]
                    feed_dict={self.encoder.X : batch_x,self.encoder.y : batch_x}
                    cost,_,acc= sess.run([loss,train,accuracy],feed_dict=feed_dict)

                feed_dict={self.encoder.X : self.X_train,self.encoder.y : self.X_train}
                summary,test_acc,test_cost = sess.run([merged,accuracy,loss],feed_dict=feed_dict)
                test_writer.add_summary(summary, i)  
            feed_dict={self.encoder.X : self.X_train,self.encoder.y : self.X_train}
            self.extract_feature = sess.run(self.encoder_output,feed_dict=feed_dict)
            saver.save(sess, './model/my-test-model')
        
        

In [6]:
class ClusteringLayer():
    
    number = 0 
    
    def __init__(self, n_clusters, alpha=1.0,initialize_weights = None):
        self.n_clusters = n_clusters
        self.alpha = alpha
        self.initialize_weights = initialize_weights
        self.variable_scope_name = 'Clustering-'+str(ClusteringLayer.number)
        ClusteringLayer.number+=1
        
    def set_input(self,x):
        
        self.x = x

        with tf.variable_scope(self.variable_scope_name):  
            self.init_Clusters((self.n_clusters,self.x.get_shape().as_list()[1]))
        
    def init_Clusters(self,shape):
        if self.initialize_weights is None: 
            self.clusters=tf.get_variable('weight',shape=shape,initializer=tf.contrib.layers.xavier_initializer())
        else:
            self.clusters=tf.get_variable('weight',shape=shape,initializer=tf.constant_initializer(self.initialize_weights))
        tf.summary.histogram('weight',self.clusters)
        
    def forward(self):
        if self.x == None:
            print('no input defined')
        else:
            with tf.name_scope('students_t'):
                q = 1.0 / (1.0 + (tf.reduce_sum(tf.square(tf.expand_dims(self.x, axis=1) - self.clusters), axis=2) / self.alpha))
                q **= (self.alpha + 1.0) / 2.0
                q = tf.transpose(tf.transpose(q) / tf.reduce_sum(q, axis=1))
                return q
        
    def target_distribution(self,q):
        weight = tf.square(q) / tf.reduce_sum(q,axis=0)
        return tf.transpose( tf.transpose(weight) /tf.reduce_sum(weight,axis=1))
            
        

    

In [28]:
tf.reset_default_graph()
DenseLayer.number = 0
ConvLayer.number = 0
PoolingLayer.number = 0
EmbeddingLayer.number = 0
RnnLayer.number = 0
SAE.number = 0


sae = SAE(X_train,[500, 500, 2000, 10])
sae.fit(100,256)
sae.extract_feature.shape


  0%|                                                                                          | 0/100 [00:00<?, ?it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [17:47<00:00, 11.34s/it]


(55000, 10)

In [29]:
#restore the weights i need from the encoder
tf.reset_default_graph()
with tf.Session() as sess:
    saver = tf.train.import_meta_graph('./model/my-test-model.meta')##copies the graph
    sess.run(tf.global_variables_initializer())
    graph = tf.get_default_graph()

#check all variables that exist
#     for i in graph.get_collection('variables'):
#         if 'encoder' in  i.name:
#             weights.append(graph.get_tensor_by_name(i.name).eval())
#             print(i.get_variable_scope

    weights=[]
    for i in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='encoder'):
        if 'weight:0' in  i.name:
            weights.append(graph.get_tensor_by_name(i.name).eval())
            print(i)
#     get_operation = graph.get_tensor_by_name('encoder/activations_3/tag')
#     features = sess.run(get_operation)

<tf.Variable 'encoder/Dense-0/weight:0' shape=(784, 500) dtype=float32_ref>
<tf.Variable 'encoder/Dense-1/weight:0' shape=(500, 500) dtype=float32_ref>
<tf.Variable 'encoder/Dense-2/weight:0' shape=(500, 2000) dtype=float32_ref>
<tf.Variable 'encoder/Dense-3/weight:0' shape=(2000, 10) dtype=float32_ref>


In [30]:
def get_batches(data_list, batch_size,shuffle=True):
    """
    Generates a batch generator for a dataset.
    """
    assert isinstance(data_list,list)
    
    size = data_list[0].shape[0]
    if shuffle:
        shuffle_indices = np.random.permutation(np.arange(size))
        data_list = [i[shuffle_indices] for i in data_list]
        
    number_of_batches = int(np.ceil(size/batch_size))
    for i in range(number_of_batches-1):
        index = i*batch_size
        yield [i[index:index+batch_size] for i in data_list]
    yield[i[(number_of_batches-1)*batch_size:] for i in data_list]

In [31]:
from sklearn.cluster import KMeans
tf.reset_default_graph()
DenseLayer.number = 0
ConvLayer.number = 0
PoolingLayer.number = 0
EmbeddingLayer.number = 0
RnnLayer.number = 0
SAE.number = 0
ClusteringLayer.number = 0

def cluster_acc(y_true, y_pred):
    """
    Calculate clustering accuracy. Require scikit-learn installed
    # Arguments
        y: true labels, numpy.array with shape `(n_samples,)`
        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
    # Return
        accuracy, in [0,1]
    """
    y_true = y_true.astype(np.int64)
    assert y_pred.size == y_true.size
    D = max(y_pred.max(), y_true.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(y_pred.size):
        w[y_pred[i], y_true[i]] += 1
    from sklearn.utils.linear_assignment_ import linear_assignment
    ind = linear_assignment(w.max() - w)
    return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size

def target_distribution_np(q):
    weight = np.square(q) / np.sum(q,axis=0)
    return np.transpose( np.transpose(weight) /np.sum(weight,axis=1))

# def target_distribution(q):
#     with tf.name_scope('target_distribution'):
#         weight = tf.square(q) / tf.reduce_sum(q,axis=0)
#         return tf.transpose( tf.transpose(weight) /tf.reduce_sum(weight,axis=1))

p = tf.placeholder(dtype=tf.float32,shape=(None,10))
#create encoder with pretrained weights
encoder = DNN(X_train,y_train,[DenseLayer(500,initialize_weights=weights[0]),DenseLayer(500,initialize_weights=weights[1]),\
                               DenseLayer(2000,initialize_weights=weights[2]),DenseLayer(10,initialize_weights=weights[3],activation_function=None),\
                                ],dtypes=[tf.float32,tf.int32])

# encoder = DNN(X_train,y_train,[DenseLayer(500),DenseLayer(500),\
#                                DenseLayer(2000),DenseLayer(10,activation_function=None),\
#                                 ],dtypes=[tf.float32,tf.int32])

encoder_output = encoder.forward()[-1]
#get encoder features
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    features = sess.run(encoder_output,feed_dict={encoder.X:X_train,encoder.y:y_train})

#get kmean weights to initialize clustering layer
print('initializing kmeans clusters ')
n_clusters=10
km = KMeans(n_clusters, n_init=20)
y_pred = km.fit_predict(features)
kmeans_weights =km.cluster_centers_ 
print(km.cluster_centers_.shape)

clustering = DNN(encoder_output,encoder.y,[ClusteringLayer(10,initialize_weights=kmeans_weights)],as_placeholders=False)
clustering_activations = clustering.forward()
q = clustering_activations[-1]

kld_cost = encoder.kl_divergence(p,q)
tf.summary.scalar('loss',kld_cost)
#optimizer = tf.train.MomentumOptimizer(0.001,0.9)
optimizer = tf.train.AdamOptimizer()
train = optimizer.minimize(kld_cost)

merged = tf.summary.merge_all()

num_epochs = 100
batch_size = 256
tol=1e-3
update_rate = 1 # epochs

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    log_name = datetime.now().strftime("%Y%m%d-%H%M%S")
    #train_writer = tf.summary.FileWriter('summaries/train/logs'+log_name,sess.graph)
    test_writer = tf.summary.FileWriter('summaries/test/logs'+log_name,sess.graph)
    
    #training 
    for i in tqdm(range(num_epochs)):
         
        if (i % update_rate == 0): # epochs to update the p_values
            q_value=sess.run(q,feed_dict={encoder.X : X_train})  
            p_value = target_distribution_np(q_value)
            summary,cost=sess.run([merged,kld_cost],feed_dict={encoder.X : X_train,p:p_value})  
            test_writer.add_summary(summary, i)
            batch_train_acc = cluster_acc(y_train,np.argmax(q_value,axis=1).reshape(-1,1))
            print('____________________________________________________')
            print("cost: "+str(cost)+" accuracy: "+ str(batch_train_acc)+' before epoch '+ str(i))
        
        for c,batch in enumerate(get_batches([encoder.X_train,encoder.y_train,p_value],batch_size=batch_size)): 
            batch_x = batch[0]
            batch_y = batch[1]
            batch_p = batch[2]
            
            q_value,cost,_,=sess.run([p,kld_cost,train],feed_dict={encoder.X:batch_x , p:batch_p}) 
            batch_train_acc = cluster_acc(batch_y,np.argmax(q_value,axis=1).reshape(-1,1))
            if c%40==0:
                print("cost: "+str(cost)+"\taccuracy: "+ str(batch_train_acc)+" in " + str(i) + ' epoch in ' +str(c) +'batch')
        


initializing kmeans clusters 
(10, 10)


  0%|                                                                                          | 0/100 [00:00<?, ?it/s]

____________________________________________________
cost: 78.497 accuracy: 0.344054545455 before epoch 0
cost: 0.376776	accuracy: 0.34765625 in 0 epoch in 0batch
cost: 1.11894	accuracy: 0.36328125 in 0 epoch in 40batch
cost: 0.247955	accuracy: 0.41796875 in 0 epoch in 80batch
cost: 0.210177	accuracy: 0.359375 in 0 epoch in 120batch
cost: 0.134121	accuracy: 0.35546875 in 0 epoch in 160batch
cost: 0.121728	accuracy: 0.39453125 in 0 epoch in 200batch


  1%|▊                                                                                 | 1/100 [00:10<17:20, 10.51s/it]

____________________________________________________
cost: 291.338 accuracy: 0.351709090909 before epoch 1
cost: 1.28057	accuracy: 0.36328125 in 1 epoch in 0batch
cost: 0.313006	accuracy: 0.3984375 in 1 epoch in 40batch
cost: 0.156145	accuracy: 0.36328125 in 1 epoch in 80batch
cost: 0.101987	accuracy: 0.3671875 in 1 epoch in 120batch
cost: 0.0649016	accuracy: 0.35546875 in 1 epoch in 160batch
cost: 0.0559427	accuracy: 0.3125 in 1 epoch in 200batch


  2%|█▋                                                                                | 2/100 [00:20<17:07, 10.48s/it]

____________________________________________________
cost: 1161.0 accuracy: 0.351454545455 before epoch 2
cost: 5.03241	accuracy: 0.3671875 in 2 epoch in 0batch
cost: 1.67266	accuracy: 0.40234375 in 2 epoch in 40batch
cost: 1.03747	accuracy: 0.38671875 in 2 epoch in 80batch
cost: 0.623268	accuracy: 0.36328125 in 2 epoch in 120batch
cost: 0.611036	accuracy: 0.37109375 in 2 epoch in 160batch
cost: 0.591627	accuracy: 0.40234375 in 2 epoch in 200batch


  3%|██▍                                                                               | 3/100 [00:30<16:34, 10.25s/it]

____________________________________________________
cost: 3887.32 accuracy: 0.352309090909 before epoch 3
cost: 16.3716	accuracy: 0.390625 in 3 epoch in 0batch
cost: 8.75732	accuracy: 0.37890625 in 3 epoch in 40batch
cost: 7.28726	accuracy: 0.37890625 in 3 epoch in 80batch
cost: 5.9159	accuracy: 0.3671875 in 3 epoch in 120batch
cost: 4.71711	accuracy: 0.31640625 in 3 epoch in 160batch
cost: 4.51487	accuracy: 0.390625 in 3 epoch in 200batch


  4%|███▎                                                                              | 4/100 [00:40<16:08, 10.09s/it]

____________________________________________________
cost: 8826.72 accuracy: 0.354472727273 before epoch 4
cost: 41.854	accuracy: 0.36328125 in 4 epoch in 0batch
cost: 35.0545	accuracy: 0.4140625 in 4 epoch in 40batch
cost: 22.7987	accuracy: 0.35546875 in 4 epoch in 80batch
cost: 19.0731	accuracy: 0.31640625 in 4 epoch in 120batch
cost: 17.4796	accuracy: 0.34765625 in 4 epoch in 160batch
cost: 16.2585	accuracy: 0.37109375 in 4 epoch in 200batch


  5%|████                                                                              | 5/100 [00:50<16:06, 10.17s/it]

____________________________________________________
cost: 13643.7 accuracy: 0.357636363636 before epoch 5
cost: 64.721	accuracy: 0.41015625 in 5 epoch in 0batch
cost: 60.3796	accuracy: 0.4140625 in 5 epoch in 40batch
cost: 46.115	accuracy: 0.3671875 in 5 epoch in 80batch
cost: 43.0985	accuracy: 0.39453125 in 5 epoch in 120batch
cost: 38.0995	accuracy: 0.3203125 in 5 epoch in 160batch
cost: 37.9661	accuracy: 0.36328125 in 5 epoch in 200batch


  6%|████▉                                                                             | 6/100 [00:59<15:20,  9.80s/it]

____________________________________________________
cost: 14442.8 accuracy: 0.360854545455 before epoch 6
cost: 68.0132	accuracy: 0.3671875 in 6 epoch in 0batch
cost: 83.1306	accuracy: 0.38671875 in 6 epoch in 40batch
cost: 71.9988	accuracy: 0.37109375 in 6 epoch in 80batch
cost: 70.0924	accuracy: 0.3671875 in 6 epoch in 120batch
cost: 69.8676	accuracy: 0.359375 in 6 epoch in 160batch
cost: 71.6959	accuracy: 0.37109375 in 6 epoch in 200batch


  7%|█████▋                                                                            | 7/100 [01:08<14:51,  9.59s/it]

____________________________________________________
cost: 12991.8 accuracy: 0.367581818182 before epoch 7
cost: 60.5261	accuracy: 0.37109375 in 7 epoch in 0batch
cost: 67.2255	accuracy: 0.375 in 7 epoch in 40batch
cost: 69.9285	accuracy: 0.3515625 in 7 epoch in 80batch
cost: 71.2881	accuracy: 0.3828125 in 7 epoch in 120batch
cost: 69.7127	accuracy: 0.40234375 in 7 epoch in 160batch
cost: 57.3587	accuracy: 0.37109375 in 7 epoch in 200batch


  8%|██████▌                                                                           | 8/100 [01:16<14:04,  9.17s/it]

____________________________________________________
cost: 11643.0 accuracy: 0.369472727273 before epoch 8
cost: 54.2396	accuracy: 0.3828125 in 8 epoch in 0batch
cost: 57.1719	accuracy: 0.36328125 in 8 epoch in 40batch
cost: 60.7234	accuracy: 0.35546875 in 8 epoch in 80batch
cost: 61.4364	accuracy: 0.359375 in 8 epoch in 120batch
cost: 65.5217	accuracy: 0.3515625 in 8 epoch in 160batch
cost: 63.2029	accuracy: 0.39453125 in 8 epoch in 200batch


  9%|███████▍                                                                          | 9/100 [01:25<13:42,  9.04s/it]

____________________________________________________
cost: 10635.2 accuracy: 0.376236363636 before epoch 9
cost: 50.2185	accuracy: 0.3828125 in 9 epoch in 0batch
cost: 59.204	accuracy: 0.3671875 in 9 epoch in 40batch
cost: 63.975	accuracy: 0.3515625 in 9 epoch in 80batch
cost: 54.9664	accuracy: 0.375 in 9 epoch in 120batch
cost: 66.6906	accuracy: 0.40625 in 9 epoch in 160batch
cost: 71.602	accuracy: 0.4296875 in 9 epoch in 200batch


 10%|████████                                                                         | 10/100 [01:33<13:10,  8.78s/it]

____________________________________________________
cost: 9618.59 accuracy: 0.378563636364 before epoch 10
cost: 44.211	accuracy: 0.36328125 in 10 epoch in 0batch
cost: 45.9163	accuracy: 0.37890625 in 10 epoch in 40batch
cost: 49.822	accuracy: 0.37109375 in 10 epoch in 80batch
cost: 58.6606	accuracy: 0.34765625 in 10 epoch in 120batch
cost: 56.4626	accuracy: 0.3984375 in 10 epoch in 160batch
cost: 54.1117	accuracy: 0.35546875 in 10 epoch in 200batch


 11%|████████▉                                                                        | 11/100 [01:41<12:39,  8.54s/it]

____________________________________________________
cost: 8960.1 accuracy: 0.378454545455 before epoch 11
cost: 41.8107	accuracy: 0.40234375 in 11 epoch in 0batch
cost: 50.2793	accuracy: 0.375 in 11 epoch in 40batch
cost: 51.5733	accuracy: 0.40625 in 11 epoch in 80batch
cost: 51.9674	accuracy: 0.421875 in 11 epoch in 120batch
cost: 48.6119	accuracy: 0.390625 in 11 epoch in 160batch
cost: 67.0785	accuracy: 0.40234375 in 11 epoch in 200batch


 12%|█████████▋                                                                       | 12/100 [01:50<12:28,  8.50s/it]

____________________________________________________
cost: 9094.5 accuracy: 0.381509090909 before epoch 12
cost: 41.7874	accuracy: 0.39453125 in 12 epoch in 0batch
cost: 45.5732	accuracy: 0.359375 in 12 epoch in 40batch
cost: 50.4316	accuracy: 0.32421875 in 12 epoch in 80batch
cost: 43.4682	accuracy: 0.375 in 12 epoch in 120batch
cost: 46.0827	accuracy: 0.4296875 in 12 epoch in 160batch
cost: 49.7107	accuracy: 0.3828125 in 12 epoch in 200batch


 13%|██████████▌                                                                      | 13/100 [01:58<12:25,  8.57s/it]

____________________________________________________
cost: 7985.82 accuracy: 0.378854545455 before epoch 13
cost: 38.2092	accuracy: 0.3515625 in 13 epoch in 0batch
cost: 44.1471	accuracy: 0.34765625 in 13 epoch in 40batch
cost: 45.8771	accuracy: 0.3828125 in 13 epoch in 80batch
cost: 47.3451	accuracy: 0.359375 in 13 epoch in 120batch
cost: 46.6446	accuracy: 0.39453125 in 13 epoch in 160batch
cost: 57.7361	accuracy: 0.36328125 in 13 epoch in 200batch


 14%|███████████▎                                                                     | 14/100 [02:07<12:10,  8.49s/it]

____________________________________________________
cost: 7614.63 accuracy: 0.380654545455 before epoch 14
cost: 34.1432	accuracy: 0.375 in 14 epoch in 0batch
cost: 45.0357	accuracy: 0.4296875 in 14 epoch in 40batch
cost: 37.6852	accuracy: 0.3828125 in 14 epoch in 80batch
cost: 42.2776	accuracy: 0.390625 in 14 epoch in 120batch
cost: 46.2355	accuracy: 0.40625 in 14 epoch in 160batch
cost: 50.5099	accuracy: 0.41796875 in 14 epoch in 200batch


 15%|████████████▏                                                                    | 15/100 [02:14<11:38,  8.22s/it]

____________________________________________________
cost: 6926.73 accuracy: 0.383472727273 before epoch 15
cost: 30.9575	accuracy: 0.421875 in 15 epoch in 0batch
cost: 34.3417	accuracy: 0.42578125 in 15 epoch in 40batch
cost: 38.5331	accuracy: 0.39453125 in 15 epoch in 80batch
cost: 43.1647	accuracy: 0.32421875 in 15 epoch in 120batch
cost: 43.1532	accuracy: 0.421875 in 15 epoch in 160batch
cost: 52.5357	accuracy: 0.34765625 in 15 epoch in 200batch


 16%|████████████▉                                                                    | 16/100 [02:22<11:14,  8.03s/it]

____________________________________________________
cost: 6441.25 accuracy: 0.383036363636 before epoch 16
cost: 30.5946	accuracy: 0.34765625 in 16 epoch in 0batch
cost: 34.851	accuracy: 0.4453125 in 16 epoch in 40batch
cost: 40.4497	accuracy: 0.38671875 in 16 epoch in 80batch
cost: 35.2286	accuracy: 0.41015625 in 16 epoch in 120batch
cost: 39.8519	accuracy: 0.36328125 in 16 epoch in 160batch
cost: 38.5481	accuracy: 0.38671875 in 16 epoch in 200batch


 17%|█████████████▊                                                                   | 17/100 [02:30<10:56,  7.91s/it]

____________________________________________________
cost: 6235.88 accuracy: 0.3854 before epoch 17
cost: 27.9617	accuracy: 0.390625 in 17 epoch in 0batch
cost: 32.7074	accuracy: 0.3515625 in 17 epoch in 40batch
cost: 34.8253	accuracy: 0.40234375 in 17 epoch in 80batch
cost: 35.6982	accuracy: 0.39453125 in 17 epoch in 120batch
cost: 36.2417	accuracy: 0.37109375 in 17 epoch in 160batch
cost: 41.4333	accuracy: 0.390625 in 17 epoch in 200batch


 18%|██████████████▌                                                                  | 18/100 [02:37<10:45,  7.87s/it]

____________________________________________________
cost: 5967.67 accuracy: 0.3838 before epoch 18
cost: 27.0609	accuracy: 0.40234375 in 18 epoch in 0batch
cost: 36.0687	accuracy: 0.42578125 in 18 epoch in 40batch
cost: 30.0427	accuracy: 0.4296875 in 18 epoch in 80batch
cost: 35.853	accuracy: 0.390625 in 18 epoch in 120batch
cost: 34.998	accuracy: 0.41015625 in 18 epoch in 160batch
cost: 32.5115	accuracy: 0.38671875 in 18 epoch in 200batch


 19%|███████████████▍                                                                 | 19/100 [02:45<10:31,  7.80s/it]

____________________________________________________
cost: 5527.99 accuracy: 0.385436363636 before epoch 19
cost: 25.408	accuracy: 0.39453125 in 19 epoch in 0batch
cost: 32.8701	accuracy: 0.42578125 in 19 epoch in 40batch
cost: 37.003	accuracy: 0.41796875 in 19 epoch in 80batch
cost: 32.4924	accuracy: 0.390625 in 19 epoch in 120batch
cost: 32.4843	accuracy: 0.37890625 in 19 epoch in 160batch
cost: 43.8349	accuracy: 0.38671875 in 19 epoch in 200batch


 20%|████████████████▏                                                                | 20/100 [02:53<10:21,  7.77s/it]

____________________________________________________
cost: 5368.05 accuracy: 0.384818181818 before epoch 20
cost: 25.9762	accuracy: 0.390625 in 20 epoch in 0batch
cost: 32.7297	accuracy: 0.3828125 in 20 epoch in 40batch
cost: 33.8083	accuracy: 0.33984375 in 20 epoch in 80batch
cost: 31.3423	accuracy: 0.3828125 in 20 epoch in 120batch
cost: 33.0148	accuracy: 0.35546875 in 20 epoch in 160batch
cost: 30.4853	accuracy: 0.41015625 in 20 epoch in 200batch


 21%|█████████████████                                                                | 21/100 [03:00<10:09,  7.71s/it]

____________________________________________________
cost: 5179.21 accuracy: 0.385563636364 before epoch 21
cost: 24.3156	accuracy: 0.35546875 in 21 epoch in 0batch
cost: 26.6707	accuracy: 0.41796875 in 21 epoch in 40batch
cost: 30.1773	accuracy: 0.42578125 in 21 epoch in 80batch
cost: 33.8161	accuracy: 0.41796875 in 21 epoch in 120batch
cost: 40.4866	accuracy: 0.3671875 in 21 epoch in 160batch
cost: 58.4221	accuracy: 0.40234375 in 21 epoch in 200batch


 22%|█████████████████▊                                                               | 22/100 [03:08<10:01,  7.72s/it]

____________________________________________________
cost: 4929.93 accuracy: 0.386 before epoch 22
cost: 22.5398	accuracy: 0.3984375 in 22 epoch in 0batch
cost: 26.7151	accuracy: 0.3984375 in 22 epoch in 40batch
cost: 34.3285	accuracy: 0.36328125 in 22 epoch in 80batch
cost: 37.1245	accuracy: 0.40234375 in 22 epoch in 120batch
cost: 56.2684	accuracy: 0.38671875 in 22 epoch in 160batch
cost: 31.8079	accuracy: 0.4296875 in 22 epoch in 200batch


 23%|██████████████████▋                                                              | 23/100 [03:16<09:52,  7.70s/it]

____________________________________________________
cost: 4676.39 accuracy: 0.385181818182 before epoch 23
cost: 20.627	accuracy: 0.421875 in 23 epoch in 0batch
cost: 26.6628	accuracy: 0.3515625 in 23 epoch in 40batch
cost: 27.4914	accuracy: 0.40234375 in 23 epoch in 80batch
cost: 26.7324	accuracy: 0.390625 in 23 epoch in 120batch
cost: 34.6223	accuracy: 0.4296875 in 23 epoch in 160batch
cost: 38.7439	accuracy: 0.41796875 in 23 epoch in 200batch


 24%|███████████████████▍                                                             | 24/100 [03:23<09:42,  7.67s/it]

____________________________________________________
cost: 4556.28 accuracy: 0.386127272727 before epoch 24
cost: 20.9881	accuracy: 0.359375 in 24 epoch in 0batch
cost: 27.4302	accuracy: 0.40234375 in 24 epoch in 40batch
cost: 23.1692	accuracy: 0.4140625 in 24 epoch in 80batch
cost: 26.7207	accuracy: 0.390625 in 24 epoch in 120batch
cost: 28.2836	accuracy: 0.421875 in 24 epoch in 160batch
cost: 26.1527	accuracy: 0.421875 in 24 epoch in 200batch


 25%|████████████████████▎                                                            | 25/100 [03:31<09:32,  7.63s/it]

____________________________________________________
cost: 4377.84 accuracy: 0.385454545455 before epoch 25
cost: 20.9987	accuracy: 0.36328125 in 25 epoch in 0batch
cost: 26.6044	accuracy: 0.37890625 in 25 epoch in 40batch
cost: 26.4803	accuracy: 0.41796875 in 25 epoch in 80batch
cost: 30.3959	accuracy: 0.3828125 in 25 epoch in 120batch
cost: 26.7584	accuracy: 0.40234375 in 25 epoch in 160batch
cost: 27.1888	accuracy: 0.36328125 in 25 epoch in 200batch


 26%|█████████████████████                                                            | 26/100 [03:39<09:33,  7.75s/it]

____________________________________________________
cost: 4224.08 accuracy: 0.385836363636 before epoch 26
cost: 19.5863	accuracy: 0.359375 in 26 epoch in 0batch
cost: 22.7249	accuracy: 0.375 in 26 epoch in 40batch
cost: 26.5548	accuracy: 0.390625 in 26 epoch in 80batch
cost: 28.6143	accuracy: 0.3984375 in 26 epoch in 120batch
cost: 35.5967	accuracy: 0.390625 in 26 epoch in 160batch
cost: 26.5032	accuracy: 0.3828125 in 26 epoch in 200batch


 27%|█████████████████████▊                                                           | 27/100 [03:48<09:46,  8.04s/it]

____________________________________________________
cost: 3980.47 accuracy: 0.385 before epoch 27
cost: 19.9055	accuracy: 0.4140625 in 27 epoch in 0batch
cost: 21.5898	accuracy: 0.3515625 in 27 epoch in 40batch
cost: 25.4789	accuracy: 0.375 in 27 epoch in 80batch
cost: 18.9743	accuracy: 0.42578125 in 27 epoch in 120batch
cost: 25.641	accuracy: 0.41015625 in 27 epoch in 160batch
cost: 27.7258	accuracy: 0.4296875 in 27 epoch in 200batch


 28%|██████████████████████▋                                                          | 28/100 [03:56<09:53,  8.24s/it]

____________________________________________________
cost: 3816.34 accuracy: 0.384236363636 before epoch 28
cost: 17.5199	accuracy: 0.39453125 in 28 epoch in 0batch
cost: 18.1002	accuracy: 0.38671875 in 28 epoch in 40batch
cost: 30.8784	accuracy: 0.34765625 in 28 epoch in 80batch
cost: 22.3929	accuracy: 0.39453125 in 28 epoch in 120batch
cost: 24.2773	accuracy: 0.3671875 in 28 epoch in 160batch
cost: 20.5871	accuracy: 0.42578125 in 28 epoch in 200batch


 29%|███████████████████████▍                                                         | 29/100 [04:05<09:53,  8.36s/it]

____________________________________________________
cost: 3786.26 accuracy: 0.3834 before epoch 29
cost: 17.6179	accuracy: 0.3984375 in 29 epoch in 0batch
cost: 26.4755	accuracy: 0.36328125 in 29 epoch in 40batch
cost: 23.828	accuracy: 0.3984375 in 29 epoch in 80batch
cost: 24.9175	accuracy: 0.359375 in 29 epoch in 120batch
cost: 23.862	accuracy: 0.3359375 in 29 epoch in 160batch
cost: 18.7643	accuracy: 0.37890625 in 29 epoch in 200batch


 30%|████████████████████████▎                                                        | 30/100 [04:13<09:48,  8.41s/it]

____________________________________________________
cost: 3607.68 accuracy: 0.383563636364 before epoch 30
cost: 15.7776	accuracy: 0.37890625 in 30 epoch in 0batch
cost: 23.1995	accuracy: 0.36328125 in 30 epoch in 40batch
cost: 26.6779	accuracy: 0.35546875 in 30 epoch in 80batch
cost: 29.7114	accuracy: 0.375 in 30 epoch in 120batch
cost: 22.5051	accuracy: 0.36328125 in 30 epoch in 160batch
cost: 20.7003	accuracy: 0.3984375 in 30 epoch in 200batch


 31%|█████████████████████████                                                        | 31/100 [04:22<09:36,  8.35s/it]

____________________________________________________
cost: 3368.77 accuracy: 0.383236363636 before epoch 31
cost: 16.7149	accuracy: 0.4375 in 31 epoch in 0batch
cost: 18.6392	accuracy: 0.37890625 in 31 epoch in 40batch
cost: 16.735	accuracy: 0.40234375 in 31 epoch in 80batch
cost: 21.9698	accuracy: 0.40625 in 31 epoch in 120batch
cost: 23.897	accuracy: 0.453125 in 31 epoch in 160batch
cost: 23.0756	accuracy: 0.37890625 in 31 epoch in 200batch


 32%|█████████████████████████▉                                                       | 32/100 [04:30<09:22,  8.27s/it]

____________________________________________________
cost: 3432.56 accuracy: 0.384363636364 before epoch 32
cost: 16.648	accuracy: 0.39453125 in 32 epoch in 0batch
cost: 17.664	accuracy: 0.38671875 in 32 epoch in 40batch
cost: 19.5674	accuracy: 0.4375 in 32 epoch in 80batch
cost: 32.0048	accuracy: 0.421875 in 32 epoch in 120batch
cost: 28.5406	accuracy: 0.484375 in 32 epoch in 160batch
cost: 27.0379	accuracy: 0.34375 in 32 epoch in 200batch


 33%|██████████████████████████▋                                                      | 33/100 [04:38<09:08,  8.19s/it]

____________________________________________________
cost: 3409.86 accuracy: 0.382418181818 before epoch 33
cost: 15.0177	accuracy: 0.30859375 in 33 epoch in 0batch
cost: 22.3478	accuracy: 0.33984375 in 33 epoch in 40batch
cost: 31.7097	accuracy: 0.40234375 in 33 epoch in 80batch
cost: 22.0299	accuracy: 0.390625 in 33 epoch in 120batch
cost: 22.5429	accuracy: 0.39453125 in 33 epoch in 160batch
cost: 20.4211	accuracy: 0.38671875 in 33 epoch in 200batch


 34%|███████████████████████████▌                                                     | 34/100 [04:46<08:53,  8.08s/it]

____________________________________________________
cost: 3235.63 accuracy: 0.382781818182 before epoch 34
cost: 13.7857	accuracy: 0.421875 in 34 epoch in 0batch
cost: 14.8521	accuracy: 0.3984375 in 34 epoch in 40batch
cost: 30.9397	accuracy: 0.37109375 in 34 epoch in 80batch
cost: 14.1613	accuracy: 0.42578125 in 34 epoch in 120batch
cost: 20.1652	accuracy: 0.33203125 in 34 epoch in 160batch
cost: 19.6694	accuracy: 0.4140625 in 34 epoch in 200batch


 35%|████████████████████████████▎                                                    | 35/100 [04:53<08:32,  7.89s/it]

____________________________________________________
cost: 3176.23 accuracy: 0.382018181818 before epoch 35
cost: 14.3146	accuracy: 0.4453125 in 35 epoch in 0batch
cost: 22.7267	accuracy: 0.40234375 in 35 epoch in 40batch
cost: 18.5655	accuracy: 0.375 in 35 epoch in 80batch
cost: 22.7033	accuracy: 0.40234375 in 35 epoch in 120batch
cost: 28.3378	accuracy: 0.38671875 in 35 epoch in 160batch
cost: 21.1939	accuracy: 0.37109375 in 35 epoch in 200batch


 36%|█████████████████████████████▏                                                   | 36/100 [05:02<08:44,  8.20s/it]

____________________________________________________
cost: 3136.36 accuracy: 0.382181818182 before epoch 36
cost: 14.346	accuracy: 0.40234375 in 36 epoch in 0batch
cost: 18.4527	accuracy: 0.40234375 in 36 epoch in 40batch
cost: 20.5482	accuracy: 0.41796875 in 36 epoch in 80batch
cost: 16.4631	accuracy: 0.40625 in 36 epoch in 120batch
cost: 15.6286	accuracy: 0.34765625 in 36 epoch in 160batch
cost: 29.1541	accuracy: 0.3828125 in 36 epoch in 200batch


 37%|█████████████████████████████▉                                                   | 37/100 [05:10<08:34,  8.17s/it]

____________________________________________________
cost: 3176.13 accuracy: 0.381963636364 before epoch 37
cost: 15.5417	accuracy: 0.37109375 in 37 epoch in 0batch
cost: 21.7744	accuracy: 0.41796875 in 37 epoch in 40batch
cost: 20.9634	accuracy: 0.33203125 in 37 epoch in 80batch
cost: 27.6975	accuracy: 0.375 in 37 epoch in 120batch
cost: 23.6875	accuracy: 0.39453125 in 37 epoch in 160batch
cost: 26.324	accuracy: 0.34375 in 37 epoch in 200batch


 38%|██████████████████████████████▊                                                  | 38/100 [05:18<08:18,  8.05s/it]

____________________________________________________
cost: 3046.6 accuracy: 0.382854545455 before epoch 38
cost: 14.0589	accuracy: 0.3984375 in 38 epoch in 0batch
cost: 16.8591	accuracy: 0.375 in 38 epoch in 40batch
cost: 29.2445	accuracy: 0.36328125 in 38 epoch in 80batch
cost: 25.3607	accuracy: 0.390625 in 38 epoch in 120batch
cost: 24.7518	accuracy: 0.38671875 in 38 epoch in 160batch
cost: 23.1664	accuracy: 0.38671875 in 38 epoch in 200batch


 39%|███████████████████████████████▌                                                 | 39/100 [05:25<08:02,  7.91s/it]

____________________________________________________
cost: 2887.42 accuracy: 0.383327272727 before epoch 39
cost: 13.6653	accuracy: 0.390625 in 39 epoch in 0batch
cost: 16.7689	accuracy: 0.3046875 in 39 epoch in 40batch
cost: 16.1992	accuracy: 0.39453125 in 39 epoch in 80batch
cost: 18.221	accuracy: 0.3828125 in 39 epoch in 120batch
cost: 33.5329	accuracy: 0.4140625 in 39 epoch in 160batch
cost: 22.0527	accuracy: 0.43359375 in 39 epoch in 200batch


 40%|████████████████████████████████▍                                                | 40/100 [05:34<08:08,  8.13s/it]

____________________________________________________
cost: 2754.68 accuracy: 0.383381818182 before epoch 40
cost: 12.7671	accuracy: 0.390625 in 40 epoch in 0batch
cost: 16.503	accuracy: 0.37890625 in 40 epoch in 40batch
cost: 16.8812	accuracy: 0.40234375 in 40 epoch in 80batch
cost: 16.8969	accuracy: 0.40625 in 40 epoch in 120batch
cost: 19.3298	accuracy: 0.4140625 in 40 epoch in 160batch
cost: 26.0748	accuracy: 0.375 in 40 epoch in 200batch


 41%|█████████████████████████████████▏                                               | 41/100 [05:42<07:50,  7.97s/it]

____________________________________________________
cost: 2770.5 accuracy: 0.384472727273 before epoch 41
cost: 12.9892	accuracy: 0.36328125 in 41 epoch in 0batch
cost: 20.2388	accuracy: 0.3984375 in 41 epoch in 40batch
cost: 24.6568	accuracy: 0.3828125 in 41 epoch in 80batch
cost: 33.3206	accuracy: 0.359375 in 41 epoch in 120batch
cost: 20.0435	accuracy: 0.40625 in 41 epoch in 160batch
cost: 22.264	accuracy: 0.35546875 in 41 epoch in 200batch


 42%|██████████████████████████████████                                               | 42/100 [05:49<07:31,  7.79s/it]

____________________________________________________
cost: 2675.59 accuracy: 0.383909090909 before epoch 42
cost: 13.65	accuracy: 0.37109375 in 42 epoch in 0batch
cost: 16.6945	accuracy: 0.390625 in 42 epoch in 40batch
cost: 20.7402	accuracy: 0.3828125 in 42 epoch in 80batch
cost: 20.0978	accuracy: 0.40234375 in 42 epoch in 120batch
cost: 19.067	accuracy: 0.4140625 in 42 epoch in 160batch
cost: 15.9865	accuracy: 0.3984375 in 42 epoch in 200batch


 43%|██████████████████████████████████▊                                              | 43/100 [05:56<07:16,  7.66s/it]

____________________________________________________
cost: 2765.32 accuracy: 0.383381818182 before epoch 43
cost: 13.2231	accuracy: 0.38671875 in 43 epoch in 0batch
cost: 16.7749	accuracy: 0.38671875 in 43 epoch in 40batch
cost: 13.3584	accuracy: 0.4296875 in 43 epoch in 80batch
cost: 19.302	accuracy: 0.3828125 in 43 epoch in 120batch
cost: 14.9556	accuracy: 0.40625 in 43 epoch in 160batch
cost: 23.9777	accuracy: 0.40625 in 43 epoch in 200batch


 44%|███████████████████████████████████▋                                             | 44/100 [06:04<07:15,  7.79s/it]

____________________________________________________
cost: 2580.31 accuracy: 0.382490909091 before epoch 44
cost: 12.0258	accuracy: 0.37890625 in 44 epoch in 0batch
cost: 16.1983	accuracy: 0.37890625 in 44 epoch in 40batch
cost: 25.1582	accuracy: 0.30859375 in 44 epoch in 80batch
cost: 17.5252	accuracy: 0.38671875 in 44 epoch in 120batch
cost: 15.138	accuracy: 0.375 in 44 epoch in 160batch
cost: 16.8601	accuracy: 0.3984375 in 44 epoch in 200batch


 45%|████████████████████████████████████▍                                            | 45/100 [06:13<07:18,  7.97s/it]

____________________________________________________
cost: 2451.04 accuracy: 0.383890909091 before epoch 45
cost: 11.6974	accuracy: 0.359375 in 45 epoch in 0batch
cost: 12.3704	accuracy: 0.39453125 in 45 epoch in 40batch
cost: 19.2348	accuracy: 0.40625 in 45 epoch in 80batch
cost: 15.9777	accuracy: 0.37890625 in 45 epoch in 120batch
cost: 18.0801	accuracy: 0.44140625 in 45 epoch in 160batch
cost: 17.3395	accuracy: 0.38671875 in 45 epoch in 200batch


 46%|█████████████████████████████████████▎                                           | 46/100 [06:20<07:02,  7.83s/it]

____________________________________________________
cost: 2425.66 accuracy: 0.384509090909 before epoch 46
cost: 10.9969	accuracy: 0.3671875 in 46 epoch in 0batch
cost: 14.5705	accuracy: 0.3828125 in 46 epoch in 40batch
cost: 14.8868	accuracy: 0.41015625 in 46 epoch in 80batch
cost: 19.0547	accuracy: 0.37890625 in 46 epoch in 120batch
cost: 25.9722	accuracy: 0.41015625 in 46 epoch in 160batch
cost: 28.4033	accuracy: 0.35546875 in 46 epoch in 200batch


 47%|██████████████████████████████████████                                           | 47/100 [06:28<06:52,  7.79s/it]

____________________________________________________
cost: 2565.06 accuracy: 0.385 before epoch 47
cost: 12.811	accuracy: 0.39453125 in 47 epoch in 0batch
cost: 12.2414	accuracy: 0.40625 in 47 epoch in 40batch
cost: 24.6838	accuracy: 0.3984375 in 47 epoch in 80batch
cost: 17.1382	accuracy: 0.4140625 in 47 epoch in 120batch
cost: 18.8505	accuracy: 0.421875 in 47 epoch in 160batch
cost: 14.0653	accuracy: 0.375 in 47 epoch in 200batch


 48%|██████████████████████████████████████▉                                          | 48/100 [06:35<06:38,  7.65s/it]

____________________________________________________
cost: 2488.83 accuracy: 0.385327272727 before epoch 48
cost: 10.6228	accuracy: 0.37890625 in 48 epoch in 0batch
cost: 11.5684	accuracy: 0.41796875 in 48 epoch in 40batch
cost: 22.3311	accuracy: 0.34375 in 48 epoch in 80batch
cost: 19.0478	accuracy: 0.3828125 in 48 epoch in 120batch
cost: 15.9059	accuracy: 0.37890625 in 48 epoch in 160batch
cost: 12.9548	accuracy: 0.40234375 in 48 epoch in 200batch


 49%|███████████████████████████████████████▋                                         | 49/100 [06:43<06:31,  7.68s/it]

____________________________________________________
cost: 2296.64 accuracy: 0.385236363636 before epoch 49
cost: 10.262	accuracy: 0.37109375 in 49 epoch in 0batch
cost: 11.7045	accuracy: 0.39453125 in 49 epoch in 40batch
cost: 12.1584	accuracy: 0.41015625 in 49 epoch in 80batch
cost: 17.3446	accuracy: 0.34375 in 49 epoch in 120batch
cost: 17.9015	accuracy: 0.34375 in 49 epoch in 160batch
cost: 16.9614	accuracy: 0.40625 in 49 epoch in 200batch


 50%|████████████████████████████████████████▌                                        | 50/100 [06:51<06:23,  7.67s/it]

____________________________________________________
cost: 2417.17 accuracy: 0.384290909091 before epoch 50
cost: 11.2649	accuracy: 0.3984375 in 50 epoch in 0batch
cost: 11.1776	accuracy: 0.39453125 in 50 epoch in 40batch
cost: 18.4788	accuracy: 0.35546875 in 50 epoch in 80batch
cost: 19.1841	accuracy: 0.390625 in 50 epoch in 120batch
cost: 15.5759	accuracy: 0.4375 in 50 epoch in 160batch
cost: 14.8694	accuracy: 0.3828125 in 50 epoch in 200batch


 51%|█████████████████████████████████████████▎                                       | 51/100 [06:59<06:19,  7.74s/it]

____________________________________________________
cost: 2391.48 accuracy: 0.384672727273 before epoch 51
cost: 10.3737	accuracy: 0.4140625 in 51 epoch in 0batch
cost: 17.2457	accuracy: 0.40625 in 51 epoch in 40batch
cost: 21.1969	accuracy: 0.37109375 in 51 epoch in 80batch
cost: 9.85097	accuracy: 0.35546875 in 51 epoch in 120batch
cost: 14.1374	accuracy: 0.40234375 in 51 epoch in 160batch
cost: 19.9701	accuracy: 0.453125 in 51 epoch in 200batch


 52%|██████████████████████████████████████████                                       | 52/100 [07:06<06:07,  7.65s/it]

____________________________________________________
cost: 2320.55 accuracy: 0.384545454545 before epoch 52
cost: 10.1493	accuracy: 0.41015625 in 52 epoch in 0batch
cost: 18.3037	accuracy: 0.40625 in 52 epoch in 40batch
cost: 18.4875	accuracy: 0.40234375 in 52 epoch in 80batch
cost: 23.3262	accuracy: 0.4453125 in 52 epoch in 120batch
cost: 18.8635	accuracy: 0.40234375 in 52 epoch in 160batch
cost: 15.2467	accuracy: 0.4765625 in 52 epoch in 200batch


 53%|██████████████████████████████████████████▉                                      | 53/100 [07:13<05:55,  7.57s/it]

____________________________________________________
cost: 2399.53 accuracy: 0.384072727273 before epoch 53
cost: 10.8372	accuracy: 0.41015625 in 53 epoch in 0batch
cost: 12.2986	accuracy: 0.42578125 in 53 epoch in 40batch
cost: 26.9222	accuracy: 0.42578125 in 53 epoch in 80batch
cost: 16.353	accuracy: 0.4296875 in 53 epoch in 120batch
cost: 17.0958	accuracy: 0.3984375 in 53 epoch in 160batch
cost: 21.7607	accuracy: 0.390625 in 53 epoch in 200batch


 54%|███████████████████████████████████████████▋                                     | 54/100 [07:21<05:44,  7.48s/it]

____________________________________________________
cost: 2310.99 accuracy: 0.383636363636 before epoch 54
cost: 10.4337	accuracy: 0.38671875 in 54 epoch in 0batch
cost: 15.3584	accuracy: 0.37109375 in 54 epoch in 40batch
cost: 22.4595	accuracy: 0.39453125 in 54 epoch in 80batch
cost: 15.074	accuracy: 0.41015625 in 54 epoch in 120batch
cost: 13.9486	accuracy: 0.3828125 in 54 epoch in 160batch
cost: 20.1575	accuracy: 0.38671875 in 54 epoch in 200batch


 55%|████████████████████████████████████████████▌                                    | 55/100 [07:28<05:31,  7.37s/it]

____________________________________________________
cost: 2243.38 accuracy: 0.3848 before epoch 55
cost: 10.4357	accuracy: 0.41015625 in 55 epoch in 0batch
cost: 12.8238	accuracy: 0.390625 in 55 epoch in 40batch
cost: 14.0959	accuracy: 0.34765625 in 55 epoch in 80batch
cost: 13.0931	accuracy: 0.40625 in 55 epoch in 120batch
cost: 16.4312	accuracy: 0.40625 in 55 epoch in 160batch
cost: 21.8387	accuracy: 0.40625 in 55 epoch in 200batch


 56%|█████████████████████████████████████████████▎                                   | 56/100 [07:35<05:20,  7.28s/it]

____________________________________________________
cost: 2276.91 accuracy: 0.384254545455 before epoch 56
cost: 9.81225	accuracy: 0.421875 in 56 epoch in 0batch
cost: 8.86238	accuracy: 0.4296875 in 56 epoch in 40batch
cost: 17.9094	accuracy: 0.37109375 in 56 epoch in 80batch
cost: 14.5877	accuracy: 0.4375 in 56 epoch in 120batch
cost: 13.5733	accuracy: 0.4296875 in 56 epoch in 160batch
cost: 16.1657	accuracy: 0.40625 in 56 epoch in 200batch


 57%|██████████████████████████████████████████████▏                                  | 57/100 [07:42<05:12,  7.26s/it]

____________________________________________________
cost: 2159.6 accuracy: 0.386490909091 before epoch 57
cost: 10.6608	accuracy: 0.4296875 in 57 epoch in 0batch
cost: 10.8332	accuracy: 0.3984375 in 57 epoch in 40batch
cost: 13.2887	accuracy: 0.41796875 in 57 epoch in 80batch
cost: 8.75957	accuracy: 0.38671875 in 57 epoch in 120batch
cost: 11.8836	accuracy: 0.39453125 in 57 epoch in 160batch
cost: 18.2954	accuracy: 0.40234375 in 57 epoch in 200batch


 58%|██████████████████████████████████████████████▉                                  | 58/100 [07:50<05:08,  7.35s/it]

____________________________________________________
cost: 2212.1 accuracy: 0.387072727273 before epoch 58
cost: 11.3591	accuracy: 0.37890625 in 58 epoch in 0batch
cost: 12.4017	accuracy: 0.4140625 in 58 epoch in 40batch
cost: 19.7974	accuracy: 0.38671875 in 58 epoch in 80batch
cost: 13.9231	accuracy: 0.4140625 in 58 epoch in 120batch
cost: 15.8547	accuracy: 0.38671875 in 58 epoch in 160batch
cost: 16.4877	accuracy: 0.39453125 in 58 epoch in 200batch


 59%|███████████████████████████████████████████████▊                                 | 59/100 [07:57<04:58,  7.27s/it]

____________________________________________________
cost: 2124.42 accuracy: 0.386072727273 before epoch 59
cost: 11.4953	accuracy: 0.34765625 in 59 epoch in 0batch
cost: 13.1763	accuracy: 0.3984375 in 59 epoch in 40batch
cost: 14.0974	accuracy: 0.40234375 in 59 epoch in 80batch
cost: 15.0843	accuracy: 0.3984375 in 59 epoch in 120batch
cost: 17.9579	accuracy: 0.39453125 in 59 epoch in 160batch
cost: 24.5057	accuracy: 0.39453125 in 59 epoch in 200batch


 60%|████████████████████████████████████████████████▌                                | 60/100 [08:04<04:48,  7.21s/it]

____________________________________________________
cost: 2176.52 accuracy: 0.387563636364 before epoch 60
cost: 8.25152	accuracy: 0.390625 in 60 epoch in 0batch
cost: 12.1028	accuracy: 0.421875 in 60 epoch in 40batch
cost: 18.3082	accuracy: 0.41015625 in 60 epoch in 80batch
cost: 14.3134	accuracy: 0.37890625 in 60 epoch in 120batch
cost: 12.2288	accuracy: 0.37890625 in 60 epoch in 160batch
cost: 23.6544	accuracy: 0.3671875 in 60 epoch in 200batch


 61%|█████████████████████████████████████████████████▍                               | 61/100 [08:11<04:35,  7.07s/it]

____________________________________________________
cost: 2147.97 accuracy: 0.387236363636 before epoch 61
cost: 9.0607	accuracy: 0.41796875 in 61 epoch in 0batch
cost: 13.0077	accuracy: 0.4140625 in 61 epoch in 40batch
cost: 10.2132	accuracy: 0.359375 in 61 epoch in 80batch
cost: 11.8365	accuracy: 0.35546875 in 61 epoch in 120batch
cost: 14.6486	accuracy: 0.41796875 in 61 epoch in 160batch
cost: 16.0835	accuracy: 0.3828125 in 61 epoch in 200batch


 62%|██████████████████████████████████████████████████▏                              | 62/100 [08:18<04:31,  7.16s/it]

____________________________________________________
cost: 1988.62 accuracy: 0.387090909091 before epoch 62
cost: 8.65158	accuracy: 0.3515625 in 62 epoch in 0batch
cost: 11.2576	accuracy: 0.3984375 in 62 epoch in 40batch
cost: 11.4545	accuracy: 0.390625 in 62 epoch in 80batch
cost: 16.9316	accuracy: 0.39453125 in 62 epoch in 120batch
cost: 11.9586	accuracy: 0.4140625 in 62 epoch in 160batch
cost: 12.7386	accuracy: 0.4609375 in 62 epoch in 200batch


 63%|███████████████████████████████████████████████████                              | 63/100 [08:25<04:24,  7.14s/it]

____________________________________________________
cost: 1886.74 accuracy: 0.387127272727 before epoch 63
cost: 8.9577	accuracy: 0.41796875 in 63 epoch in 0batch
cost: 9.3376	accuracy: 0.40234375 in 63 epoch in 40batch
cost: 17.3563	accuracy: 0.39453125 in 63 epoch in 80batch
cost: 11.0546	accuracy: 0.41015625 in 63 epoch in 120batch
cost: 18.3809	accuracy: 0.3828125 in 63 epoch in 160batch
cost: 18.1065	accuracy: 0.38671875 in 63 epoch in 200batch


 64%|███████████████████████████████████████████████████▊                             | 64/100 [08:32<04:15,  7.11s/it]

____________________________________________________
cost: 2018.84 accuracy: 0.387272727273 before epoch 64
cost: 9.01567	accuracy: 0.3828125 in 64 epoch in 0batch
cost: 18.7216	accuracy: 0.39453125 in 64 epoch in 40batch
cost: 10.7184	accuracy: 0.41796875 in 64 epoch in 80batch
cost: 19.6445	accuracy: 0.37890625 in 64 epoch in 120batch
cost: 24.0543	accuracy: 0.37109375 in 64 epoch in 160batch
cost: 25.4444	accuracy: 0.4609375 in 64 epoch in 200batch


 65%|████████████████████████████████████████████████████▋                            | 65/100 [08:39<04:09,  7.12s/it]

____________________________________________________
cost: 2052.43 accuracy: 0.387181818182 before epoch 65
cost: 11.0404	accuracy: 0.40234375 in 65 epoch in 0batch
cost: 17.5251	accuracy: 0.37109375 in 65 epoch in 40batch
cost: 11.7282	accuracy: 0.390625 in 65 epoch in 80batch
cost: 19.0427	accuracy: 0.41796875 in 65 epoch in 120batch
cost: 14.2108	accuracy: 0.3828125 in 65 epoch in 160batch
cost: 19.0981	accuracy: 0.40625 in 65 epoch in 200batch


 66%|█████████████████████████████████████████████████████▍                           | 66/100 [08:46<04:03,  7.15s/it]

____________________________________________________
cost: 2101.67 accuracy: 0.387 before epoch 66
cost: 8.62587	accuracy: 0.3828125 in 66 epoch in 0batch
cost: 11.4712	accuracy: 0.4296875 in 66 epoch in 40batch
cost: 13.2299	accuracy: 0.36328125 in 66 epoch in 80batch
cost: 11.9879	accuracy: 0.41015625 in 66 epoch in 120batch
cost: 16.391	accuracy: 0.3984375 in 66 epoch in 160batch
cost: 16.3891	accuracy: 0.359375 in 66 epoch in 200batch


 67%|██████████████████████████████████████████████████████▎                          | 67/100 [08:55<04:05,  7.43s/it]

____________________________________________________
cost: 1924.52 accuracy: 0.386709090909 before epoch 67
cost: 7.56355	accuracy: 0.4296875 in 67 epoch in 0batch
cost: 15.6084	accuracy: 0.4140625 in 67 epoch in 40batch
cost: 17.1251	accuracy: 0.37109375 in 67 epoch in 80batch
cost: 15.5384	accuracy: 0.375 in 67 epoch in 120batch
cost: 20.4447	accuracy: 0.3203125 in 67 epoch in 160batch
cost: 18.2014	accuracy: 0.3671875 in 67 epoch in 200batch


 68%|███████████████████████████████████████████████████████                          | 68/100 [09:02<04:02,  7.58s/it]

____________________________________________________
cost: 1925.14 accuracy: 0.386981818182 before epoch 68
cost: 9.08316	accuracy: 0.421875 in 68 epoch in 0batch
cost: 8.8077	accuracy: 0.3671875 in 68 epoch in 40batch
cost: 23.6096	accuracy: 0.41015625 in 68 epoch in 80batch
cost: 13.2507	accuracy: 0.42578125 in 68 epoch in 120batch
cost: 23.439	accuracy: 0.37890625 in 68 epoch in 160batch
cost: 17.3839	accuracy: 0.421875 in 68 epoch in 200batch


 69%|███████████████████████████████████████████████████████▉                         | 69/100 [09:10<03:54,  7.56s/it]

____________________________________________________
cost: 2063.4 accuracy: 0.387236363636 before epoch 69
cost: 8.74313	accuracy: 0.359375 in 69 epoch in 0batch
cost: 20.104	accuracy: 0.35546875 in 69 epoch in 40batch
cost: 11.8239	accuracy: 0.390625 in 69 epoch in 80batch
cost: 14.0506	accuracy: 0.37890625 in 69 epoch in 120batch
cost: 13.0687	accuracy: 0.40625 in 69 epoch in 160batch
cost: 14.7166	accuracy: 0.36328125 in 69 epoch in 200batch


 70%|████████████████████████████████████████████████████████▋                        | 70/100 [09:19<03:56,  7.89s/it]

____________________________________________________
cost: 2017.66 accuracy: 0.387436363636 before epoch 70
cost: 10.733	accuracy: 0.40625 in 70 epoch in 0batch
cost: 9.11907	accuracy: 0.421875 in 70 epoch in 40batch
cost: 13.4205	accuracy: 0.3828125 in 70 epoch in 80batch
cost: 17.0088	accuracy: 0.41015625 in 70 epoch in 120batch
cost: 15.4575	accuracy: 0.44921875 in 70 epoch in 160batch
cost: 18.3488	accuracy: 0.4609375 in 70 epoch in 200batch


 71%|█████████████████████████████████████████████████████████▌                       | 71/100 [09:28<04:03,  8.39s/it]

____________________________________________________
cost: 2057.0 accuracy: 0.386945454545 before epoch 71
cost: 10.209	accuracy: 0.47265625 in 71 epoch in 0batch
cost: 12.5277	accuracy: 0.421875 in 71 epoch in 40batch
cost: 17.7371	accuracy: 0.34765625 in 71 epoch in 80batch
cost: 14.1025	accuracy: 0.3671875 in 71 epoch in 120batch
cost: 19.7993	accuracy: 0.40625 in 71 epoch in 160batch
cost: 13.1905	accuracy: 0.46875 in 71 epoch in 200batch


 72%|██████████████████████████████████████████████████████████▎                      | 72/100 [09:37<03:59,  8.56s/it]

____________________________________________________
cost: 2099.1 accuracy: 0.388236363636 before epoch 72
cost: 10.1785	accuracy: 0.3515625 in 72 epoch in 0batch
cost: 14.3699	accuracy: 0.40625 in 72 epoch in 40batch
cost: 10.6671	accuracy: 0.421875 in 72 epoch in 80batch
cost: 15.7113	accuracy: 0.40625 in 72 epoch in 120batch
cost: 20.538	accuracy: 0.375 in 72 epoch in 160batch
cost: 21.4712	accuracy: 0.39453125 in 72 epoch in 200batch


 73%|███████████████████████████████████████████████████████████▏                     | 73/100 [09:45<03:46,  8.37s/it]

____________________________________________________
cost: 1885.26 accuracy: 0.388163636364 before epoch 73
cost: 7.76826	accuracy: 0.41015625 in 73 epoch in 0batch
cost: 9.33412	accuracy: 0.3828125 in 73 epoch in 40batch
cost: 15.101	accuracy: 0.38671875 in 73 epoch in 80batch
cost: 15.8541	accuracy: 0.41796875 in 73 epoch in 120batch
cost: 22.8358	accuracy: 0.37890625 in 73 epoch in 160batch
cost: 23.6407	accuracy: 0.39453125 in 73 epoch in 200batch


 74%|███████████████████████████████████████████████████████████▉                     | 74/100 [09:53<03:32,  8.18s/it]

____________________________________________________
cost: 1902.75 accuracy: 0.387563636364 before epoch 74
cost: 8.01056	accuracy: 0.40625 in 74 epoch in 0batch
cost: 10.0456	accuracy: 0.4296875 in 74 epoch in 40batch
cost: 12.5438	accuracy: 0.4453125 in 74 epoch in 80batch
cost: 12.9568	accuracy: 0.40625 in 74 epoch in 120batch
cost: 11.5894	accuracy: 0.37890625 in 74 epoch in 160batch
cost: 13.5293	accuracy: 0.44140625 in 74 epoch in 200batch


 75%|████████████████████████████████████████████████████████████▊                    | 75/100 [10:00<03:16,  7.87s/it]

____________________________________________________
cost: 1752.79 accuracy: 0.388036363636 before epoch 75
cost: 7.56305	accuracy: 0.34375 in 75 epoch in 0batch
cost: 9.54126	accuracy: 0.390625 in 75 epoch in 40batch
cost: 14.1258	accuracy: 0.42578125 in 75 epoch in 80batch
cost: 14.5964	accuracy: 0.3828125 in 75 epoch in 120batch
cost: 16.2199	accuracy: 0.3828125 in 75 epoch in 160batch
cost: 21.171	accuracy: 0.34765625 in 75 epoch in 200batch


 76%|█████████████████████████████████████████████████████████████▌                   | 76/100 [10:07<03:06,  7.76s/it]

____________________________________________________
cost: 1796.98 accuracy: 0.387763636364 before epoch 76
cost: 9.02293	accuracy: 0.42578125 in 76 epoch in 0batch
cost: 12.2481	accuracy: 0.40234375 in 76 epoch in 40batch
cost: 10.406	accuracy: 0.359375 in 76 epoch in 80batch
cost: 11.8281	accuracy: 0.3984375 in 76 epoch in 120batch
cost: 12.6833	accuracy: 0.375 in 76 epoch in 160batch
cost: 16.4811	accuracy: 0.390625 in 76 epoch in 200batch


 77%|██████████████████████████████████████████████████████████████▎                  | 77/100 [10:15<02:57,  7.73s/it]

____________________________________________________
cost: 1791.8 accuracy: 0.387218181818 before epoch 77
cost: 8.56154	accuracy: 0.40234375 in 77 epoch in 0batch
cost: 11.6635	accuracy: 0.40625 in 77 epoch in 40batch
cost: 17.4373	accuracy: 0.37109375 in 77 epoch in 80batch
cost: 14.715	accuracy: 0.421875 in 77 epoch in 120batch
cost: 13.2927	accuracy: 0.4453125 in 77 epoch in 160batch
cost: 23.9847	accuracy: 0.40234375 in 77 epoch in 200batch


 78%|███████████████████████████████████████████████████████████████▏                 | 78/100 [10:23<02:48,  7.67s/it]

____________________________________________________
cost: 1916.77 accuracy: 0.387472727273 before epoch 78
cost: 8.99537	accuracy: 0.36328125 in 78 epoch in 0batch
cost: 14.631	accuracy: 0.390625 in 78 epoch in 40batch
cost: 15.9234	accuracy: 0.47265625 in 78 epoch in 80batch
cost: 10.4718	accuracy: 0.45703125 in 78 epoch in 120batch
cost: 13.5108	accuracy: 0.37890625 in 78 epoch in 160batch
cost: 18.023	accuracy: 0.37890625 in 78 epoch in 200batch


 79%|███████████████████████████████████████████████████████████████▉                 | 79/100 [10:30<02:40,  7.66s/it]

____________________________________________________
cost: 1748.23 accuracy: 0.387781818182 before epoch 79
cost: 6.89475	accuracy: 0.38671875 in 79 epoch in 0batch
cost: 12.99	accuracy: 0.359375 in 79 epoch in 40batch
cost: 13.125	accuracy: 0.390625 in 79 epoch in 80batch
cost: 16.4125	accuracy: 0.37890625 in 79 epoch in 120batch
cost: 17.4795	accuracy: 0.40234375 in 79 epoch in 160batch
cost: 11.5392	accuracy: 0.38671875 in 79 epoch in 200batch


 80%|████████████████████████████████████████████████████████████████▊                | 80/100 [10:38<02:34,  7.71s/it]

____________________________________________________
cost: 1826.45 accuracy: 0.388872727273 before epoch 80
cost: 8.58515	accuracy: 0.37890625 in 80 epoch in 0batch
cost: 10.4277	accuracy: 0.3828125 in 80 epoch in 40batch
cost: 15.8917	accuracy: 0.41015625 in 80 epoch in 80batch
cost: 13.4478	accuracy: 0.36328125 in 80 epoch in 120batch
cost: 13.0284	accuracy: 0.35546875 in 80 epoch in 160batch
cost: 13.7917	accuracy: 0.44921875 in 80 epoch in 200batch


 81%|█████████████████████████████████████████████████████████████████▌               | 81/100 [10:46<02:25,  7.66s/it]

____________________________________________________
cost: 1766.42 accuracy: 0.389018181818 before epoch 81
cost: 7.3434	accuracy: 0.33984375 in 81 epoch in 0batch
cost: 12.9342	accuracy: 0.4453125 in 81 epoch in 40batch
cost: 9.51169	accuracy: 0.41796875 in 81 epoch in 80batch
cost: 14.0166	accuracy: 0.3671875 in 81 epoch in 120batch
cost: 11.3134	accuracy: 0.3359375 in 81 epoch in 160batch
cost: 15.2807	accuracy: 0.3671875 in 81 epoch in 200batch


 82%|██████████████████████████████████████████████████████████████████▍              | 82/100 [10:53<02:17,  7.63s/it]

____________________________________________________
cost: 1732.57 accuracy: 0.388490909091 before epoch 82
cost: 7.71752	accuracy: 0.4375 in 82 epoch in 0batch
cost: 7.29697	accuracy: 0.33984375 in 82 epoch in 40batch
cost: 10.4872	accuracy: 0.37890625 in 82 epoch in 80batch
cost: 18.7456	accuracy: 0.375 in 82 epoch in 120batch
cost: 16.0335	accuracy: 0.3828125 in 82 epoch in 160batch
cost: 12.2556	accuracy: 0.34375 in 82 epoch in 200batch


 83%|███████████████████████████████████████████████████████████████████▏             | 83/100 [11:01<02:09,  7.64s/it]

____________________________________________________
cost: 1717.35 accuracy: 0.388563636364 before epoch 83
cost: 7.63619	accuracy: 0.41796875 in 83 epoch in 0batch
cost: 11.9512	accuracy: 0.36328125 in 83 epoch in 40batch
cost: 7.53926	accuracy: 0.39453125 in 83 epoch in 80batch
cost: 8.53782	accuracy: 0.36328125 in 83 epoch in 120batch
cost: 11.9243	accuracy: 0.421875 in 83 epoch in 160batch
cost: 12.6045	accuracy: 0.38671875 in 83 epoch in 200batch


 84%|████████████████████████████████████████████████████████████████████             | 84/100 [11:09<02:04,  7.79s/it]

____________________________________________________
cost: 1666.31 accuracy: 0.389272727273 before epoch 84
cost: 7.17345	accuracy: 0.42578125 in 84 epoch in 0batch
cost: 7.30578	accuracy: 0.3671875 in 84 epoch in 40batch
cost: 14.9203	accuracy: 0.421875 in 84 epoch in 80batch
cost: 11.548	accuracy: 0.38671875 in 84 epoch in 120batch
cost: 16.7247	accuracy: 0.3984375 in 84 epoch in 160batch
cost: 7.70368	accuracy: 0.40234375 in 84 epoch in 200batch


 85%|████████████████████████████████████████████████████████████████████▊            | 85/100 [11:17<01:57,  7.85s/it]

____________________________________________________
cost: 1607.63 accuracy: 0.389272727273 before epoch 85
cost: 6.63707	accuracy: 0.421875 in 85 epoch in 0batch
cost: 8.56188	accuracy: 0.36328125 in 85 epoch in 40batch
cost: 12.9267	accuracy: 0.41015625 in 85 epoch in 80batch
cost: 17.2726	accuracy: 0.41796875 in 85 epoch in 120batch
cost: 23.7449	accuracy: 0.421875 in 85 epoch in 160batch
cost: 17.6489	accuracy: 0.34375 in 85 epoch in 200batch


 86%|█████████████████████████████████████████████████████████████████████▋           | 86/100 [11:25<01:50,  7.89s/it]

____________________________________________________
cost: 1649.25 accuracy: 0.389327272727 before epoch 86
cost: 6.41446	accuracy: 0.37109375 in 86 epoch in 0batch
cost: 15.4663	accuracy: 0.40625 in 86 epoch in 40batch
cost: 12.7096	accuracy: 0.40234375 in 86 epoch in 80batch
cost: 12.2171	accuracy: 0.38671875 in 86 epoch in 120batch
cost: 12.1868	accuracy: 0.37890625 in 86 epoch in 160batch
cost: 14.3665	accuracy: 0.40234375 in 86 epoch in 200batch


 87%|██████████████████████████████████████████████████████████████████████▍          | 87/100 [11:33<01:41,  7.80s/it]

____________________________________________________
cost: 1517.86 accuracy: 0.3886 before epoch 87
cost: 6.84677	accuracy: 0.44921875 in 87 epoch in 0batch
cost: 9.41356	accuracy: 0.39453125 in 87 epoch in 40batch
cost: 6.12609	accuracy: 0.41015625 in 87 epoch in 80batch
cost: 8.18238	accuracy: 0.4140625 in 87 epoch in 120batch
cost: 6.14241	accuracy: 0.3828125 in 87 epoch in 160batch
cost: 14.7769	accuracy: 0.4140625 in 87 epoch in 200batch


 88%|███████████████████████████████████████████████████████████████████████▎         | 88/100 [11:41<01:34,  7.87s/it]

____________________________________________________
cost: 1597.78 accuracy: 0.389545454545 before epoch 88
cost: 8.10345	accuracy: 0.375 in 88 epoch in 0batch
cost: 9.84224	accuracy: 0.38671875 in 88 epoch in 40batch
cost: 12.3196	accuracy: 0.3828125 in 88 epoch in 80batch
cost: 7.64505	accuracy: 0.453125 in 88 epoch in 120batch
cost: 8.97597	accuracy: 0.38671875 in 88 epoch in 160batch
cost: 10.2512	accuracy: 0.37109375 in 88 epoch in 200batch


 89%|████████████████████████████████████████████████████████████████████████         | 89/100 [11:49<01:28,  8.04s/it]

____________________________________________________
cost: 1663.23 accuracy: 0.3898 before epoch 89
cost: 7.87033	accuracy: 0.3984375 in 89 epoch in 0batch
cost: 16.4264	accuracy: 0.4140625 in 89 epoch in 40batch
cost: 20.2485	accuracy: 0.35546875 in 89 epoch in 80batch
cost: 19.6104	accuracy: 0.44140625 in 89 epoch in 120batch
cost: 10.5259	accuracy: 0.44140625 in 89 epoch in 160batch
cost: 8.38008	accuracy: 0.42578125 in 89 epoch in 200batch


 90%|████████████████████████████████████████████████████████████████████████▉        | 90/100 [11:57<01:19,  7.94s/it]

____________________________________________________
cost: 1557.4 accuracy: 0.388890909091 before epoch 90
cost: 6.92841	accuracy: 0.44921875 in 90 epoch in 0batch
cost: 10.7825	accuracy: 0.3828125 in 90 epoch in 40batch
cost: 16.4284	accuracy: 0.38671875 in 90 epoch in 80batch
cost: 12.4391	accuracy: 0.4140625 in 90 epoch in 120batch
cost: 18.9486	accuracy: 0.37109375 in 90 epoch in 160batch
cost: 12.543	accuracy: 0.4140625 in 90 epoch in 200batch


 91%|█████████████████████████████████████████████████████████████████████████▋       | 91/100 [12:04<01:10,  7.79s/it]

____________________________________________________
cost: 1507.76 accuracy: 0.388836363636 before epoch 91
cost: 6.75859	accuracy: 0.359375 in 91 epoch in 0batch
cost: 8.25303	accuracy: 0.38671875 in 91 epoch in 40batch
cost: 8.94647	accuracy: 0.40625 in 91 epoch in 80batch
cost: 16.9739	accuracy: 0.40234375 in 91 epoch in 120batch
cost: 15.9496	accuracy: 0.421875 in 91 epoch in 160batch
cost: 11.7248	accuracy: 0.3828125 in 91 epoch in 200batch


 92%|██████████████████████████████████████████████████████████████████████████▌      | 92/100 [12:12<01:01,  7.73s/it]

____________________________________________________
cost: 1584.69 accuracy: 0.388345454545 before epoch 92
cost: 7.48638	accuracy: 0.37109375 in 92 epoch in 0batch
cost: 10.5467	accuracy: 0.41015625 in 92 epoch in 40batch
cost: 18.6853	accuracy: 0.34765625 in 92 epoch in 80batch
cost: 10.0652	accuracy: 0.4140625 in 92 epoch in 120batch
cost: 12.0048	accuracy: 0.36328125 in 92 epoch in 160batch
cost: 10.9525	accuracy: 0.38671875 in 92 epoch in 200batch


 93%|███████████████████████████████████████████████████████████████████████████▎     | 93/100 [12:19<00:53,  7.67s/it]

____________________________________________________
cost: 1523.46 accuracy: 0.389218181818 before epoch 93
cost: 6.55698	accuracy: 0.40234375 in 93 epoch in 0batch
cost: 9.81295	accuracy: 0.41796875 in 93 epoch in 40batch
cost: 15.4341	accuracy: 0.37890625 in 93 epoch in 80batch
cost: 23.1952	accuracy: 0.3671875 in 93 epoch in 120batch
cost: 11.595	accuracy: 0.37109375 in 93 epoch in 160batch
cost: 13.301	accuracy: 0.3984375 in 93 epoch in 200batch


 94%|████████████████████████████████████████████████████████████████████████████▏    | 94/100 [12:27<00:45,  7.62s/it]

____________________________________________________
cost: 1524.38 accuracy: 0.389618181818 before epoch 94
cost: 6.63646	accuracy: 0.35546875 in 94 epoch in 0batch
cost: 10.6505	accuracy: 0.390625 in 94 epoch in 40batch
cost: 12.0597	accuracy: 0.421875 in 94 epoch in 80batch
cost: 7.96545	accuracy: 0.3671875 in 94 epoch in 120batch
cost: 9.10999	accuracy: 0.375 in 94 epoch in 160batch
cost: 6.49894	accuracy: 0.39453125 in 94 epoch in 200batch


 95%|████████████████████████████████████████████████████████████████████████████▉    | 95/100 [12:34<00:37,  7.54s/it]

____________________________________________________
cost: 1565.18 accuracy: 0.389454545455 before epoch 95
cost: 7.05444	accuracy: 0.39453125 in 95 epoch in 0batch
cost: 7.99652	accuracy: 0.4296875 in 95 epoch in 40batch
cost: 19.0309	accuracy: 0.375 in 95 epoch in 80batch
cost: 15.0986	accuracy: 0.41796875 in 95 epoch in 120batch
cost: 15.1967	accuracy: 0.390625 in 95 epoch in 160batch
cost: 13.4552	accuracy: 0.375 in 95 epoch in 200batch


 96%|█████████████████████████████████████████████████████████████████████████████▊   | 96/100 [12:42<00:30,  7.52s/it]

____________________________________________________
cost: 1439.17 accuracy: 0.388836363636 before epoch 96
cost: 6.74448	accuracy: 0.38671875 in 96 epoch in 0batch
cost: 5.66466	accuracy: 0.42578125 in 96 epoch in 40batch
cost: 8.92464	accuracy: 0.390625 in 96 epoch in 80batch
cost: 5.33909	accuracy: 0.3984375 in 96 epoch in 120batch
cost: 20.3599	accuracy: 0.4140625 in 96 epoch in 160batch
cost: 10.005	accuracy: 0.39453125 in 96 epoch in 200batch


 97%|██████████████████████████████████████████████████████████████████████████████▌  | 97/100 [12:49<00:22,  7.56s/it]

____________________________________________________
cost: 1362.73 accuracy: 0.389181818182 before epoch 97
cost: 5.95682	accuracy: 0.44921875 in 97 epoch in 0batch
cost: 5.89756	accuracy: 0.390625 in 97 epoch in 40batch
cost: 10.5268	accuracy: 0.44140625 in 97 epoch in 80batch
cost: 10.2939	accuracy: 0.3984375 in 97 epoch in 120batch
cost: 12.7578	accuracy: 0.375 in 97 epoch in 160batch
cost: 9.39843	accuracy: 0.3828125 in 97 epoch in 200batch


 98%|███████████████████████████████████████████████████████████████████████████████▍ | 98/100 [12:57<00:15,  7.51s/it]

____________________________________________________
cost: 1379.79 accuracy: 0.388709090909 before epoch 98
cost: 6.61867	accuracy: 0.4453125 in 98 epoch in 0batch
cost: 5.72042	accuracy: 0.39453125 in 98 epoch in 40batch
cost: 7.61945	accuracy: 0.41015625 in 98 epoch in 80batch
cost: 14.9409	accuracy: 0.38671875 in 98 epoch in 120batch
cost: 20.1027	accuracy: 0.40234375 in 98 epoch in 160batch
cost: 12.4322	accuracy: 0.39453125 in 98 epoch in 200batch


 99%|████████████████████████████████████████████████████████████████████████████████▏| 99/100 [13:04<00:07,  7.53s/it]

____________________________________________________
cost: 1551.64 accuracy: 0.388090909091 before epoch 99
cost: 7.11931	accuracy: 0.390625 in 99 epoch in 0batch
cost: 9.07061	accuracy: 0.33984375 in 99 epoch in 40batch
cost: 14.9246	accuracy: 0.34765625 in 99 epoch in 80batch
cost: 13.3446	accuracy: 0.44140625 in 99 epoch in 120batch
cost: 24.4901	accuracy: 0.4453125 in 99 epoch in 160batch
cost: 16.496	accuracy: 0.41015625 in 99 epoch in 200batch


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [13:12<00:00,  7.55s/it]


In [None]:
def cluster_acc(y_true, y_pred):
    """
    Calculate clustering accuracy. Require scikit-learn installed
    # Arguments
        y: true labels, numpy.array with shape `(n_samples,)`
        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
    # Return
        accuracy, in [0,1]
    """
    y_true = y_true.astype(np.int64)
    assert y_pred.size == y_true.size
    D = max(y_pred.max(), y_true.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(y_pred.size):
        w[y_pred[i], y_true[i]] += 1
    from sklearn.utils.linear_assignment_ import linear_assignment
    ind = linear_assignment(w.max() - w)
    return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size

print('performing k-means clustering on the extracted features')

n_clusters=10
print(sae.extract_feature.shape)
km = KMeans(n_clusters, n_init=20)
y_pred = km.fit_predict(sae.extract_feature)
y = y_train.reshape(-1)
from sklearn.metrics import normalized_mutual_info_score as nmi
from sklearn.metrics import accuracy_score 
print('K-means clustering result on extracted features: NMI = '+str(nmi(y, y_pred)))
print('K-means clustering result on extracted features: accuracy = '+str(cluster_acc(y, y_pred)))


In [None]:
import time
from sklearn.manifold import TSNE
from ggplot import *

def tsne(x,y,n_sne):  
    time_start = time.time()
    tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
    tsne_results = tsne.fit_transform(x[:n_sne])
    print(x.shape)
    print(y.shape)
    print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))
    x_tsne = np.expand_dims(tsne_results[:,0],1)
    y_tsne = np.expand_dims(tsne_results[:,1],1)
    label = np.int32(y[:n_sne])
    data = np.concatenate((x_tsne,y_tsne,label),axis=1)

    df_tsne = pd.DataFrame(data=data,columns=['x-tsne','y-tsne','label'])
    df_tsne.label  = df_tsne.label.map(lambda x:  str(x))
    chart = ggplot( df_tsne, aes(x='x-tsne', y='y-tsne', color='label') ) \
            + geom_point(size=70,alpha=0.1) \
            + ggtitle("tSNE dimensions colored by digit")
    chart.show()
tsne(X_train,y_train,5000)
tsne(sae.extract_feature,y_train,5000)