In [1]:
import tensorflow as tf
import numpy as np
from util import ImShow as I
import matplotlib.pyplot as plt
import time
from collections import Counter
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score as recall
from sklearn.metrics import precision_score as precision

In [2]:
def batches(l, n):
    """Yield successive n-sized batches from l, the last batch is the left indexes.
        and n-sized batch index from l with replacement
    """
    for i in xrange(0, l, n):
        yield range(i,min(l,i+n)), np.random.choice(l, size= min(n,l-i))

class Fluent_Autoencoder(object):
    def __init__(self, sess, input_dim_list=[784,400],learning_rate=0.15):
        """input_dim_list must include the original data dimension"""
        assert len(input_dim_list) >= 2
        self.W_list = []
        self.encoding_b_list = []
        self.decoding_b_list = []
        self.dim_list = input_dim_list
        ## Encoders parameters
        for i in range(len(input_dim_list)-1):
            init_max_value = np.sqrt(6. / (self.dim_list[i] + self.dim_list[i+1]))
            self.W_list.append(tf.Variable(tf.random_uniform([self.dim_list[i],self.dim_list[i+1]],
                                                             np.negative(init_max_value),init_max_value)))
            self.encoding_b_list.append(tf.Variable(tf.random_uniform([self.dim_list[i+1]],-0.1,0.1)))
        ## Decoders parameters
        for i in range(len(input_dim_list)-2,-1,-1):
            self.decoding_b_list.append(tf.Variable(tf.random_uniform([self.dim_list[i]],-0.1,0.1)))
        ## Placeholder for input
        self.input_x = tf.placeholder(tf.float32,[None,self.dim_list[0]])
        self.input_y = tf.placeholder(tf.float32,[None,self.dim_list[0]])
        ## coding graph :
        last_layer = self.input_x
        for weight,bias in zip(self.W_list,self.encoding_b_list):
            hidden = tf.sigmoid(tf.matmul(last_layer,weight) + bias)
            last_layer = hidden
        self.hidden = hidden 
        ## decode graph:
        for weight,bias in zip(reversed(self.W_list),self.decoding_b_list):
            hidden = tf.sigmoid(tf.matmul(last_layer,tf.transpose(weight)) + bias)
            last_layer = hidden
        self.recon = last_layer
        self.cost = tf.reduce_mean(tf.square(self.recon - self.input_y))
        #self.cost = tf.losses.log_loss(self.recon, self.input_x)
        self.train_step = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
        sess.run(tf.global_variables_initializer())

    def fit(self, X, sess, iteration=200, batch_size=50, init=False,verbose=False):
        assert X.shape[1] == self.dim_list[0]
        if init:
            sess.run(tf.global_variables_initializer())
        
        sample_size = X.shape[0]
        for i in xrange(iteration):
            for one_batch,random_batch in batches(sample_size, batch_size):
                sess.run(self.train_step,feed_dict = {self.input_x:X[one_batch],self.input_y:X[random_batch]})
            if verbose and i%20==0:
                e = self.cost.eval(session = sess,feed_dict = {self.input_x: X[one_batch],self.input_y:X[random_batch]})
                print "    iteration : ", i ,", cost : ", e

    def transform(self, X, sess):
        return self.hidden.eval(session = sess, feed_dict={self.input_x: X})

    def getRecon(self, X, sess):
        return self.recon.eval(session = sess,feed_dict={self.input_x: X})

In [None]:
# x = np.load(r"./data/4_other_x.npk")[:500]
# start_time = time.time()
# with tf.Graph().as_default():
#     with tf.Session() as sess:
#         ae = Fluent_Autoencoder(sess = sess, input_dim_list=[784,500,400],learning_rate=0.0001)
#         error = ae.fit(x ,sess = sess, batch_size = 500, iteration = 3000, verbose=True)
#         R = ae.getRecon(x, sess = sess)

#         error = ae.fit(x ,sess = sess,  batch_size = 500, iteration = 3000, verbose=True)
#         R1 = ae.getRecon(x, sess = sess)
#         print "Runing time:" + str(time.time() - start_time) + " s"

In [None]:
# Xpic = I.tile_raster_images(X = x, img_shape=(28,28), tile_shape=(10,10))
# plt.imshow(Xpic,cmap='gray')
# plt.show()
# Rpic = I.tile_raster_images(X = R, img_shape=(28,28), tile_shape=(10,10))
# plt.imshow(Rpic,cmap='gray')
# plt.show()
# R1pic = I.tile_raster_images(X = R1, img_shape=(28,28), tile_shape=(10,10))
# plt.imshow(R1pic,cmap='gray')
# plt.show()

In [3]:
from shrink import l21shrink as SHR 

class RobustL21Autoencoder(object):
    """
    @author: Chong Zhou
    first version.
    complete: 10/20/2016
    Des:
        X = L + S
        L is a non-linearly low dimension matrix and S is a sparse matrix.
        argmin ||L - Decoder(Encoder(L))|| + ||S||_2,1
        Use Alternating projection to train model
        The idea of shrink the l21 norm comes from the wiki 'Regularization' link: {
            https://en.wikipedia.org/wiki/Regularization_(mathematics)
        }
    Improve:
        1. fix the 0-cost bugs
    """
    def __init__(self, sess, layers_sizes, lambda_=1.0, learning_rate = 0.01):
        """
        sess: a Tensorflow tf.Session object
        layers_sizes: a list that contain the deep ae layer sizes, including the input layer
        lambda_: tuning the weight of l1 penalty of S
        error: converge criterior for jump out training iteration
        """
        self.lambda_ = lambda_
        self.layers_sizes = layers_sizes
        self.errors=[]
        #self.AE = DAE.Deep_Autoencoder( sess = sess, input_dim_list = self.layers_sizes)
        self.AE = Fluent_Autoencoder( sess = sess, 
                                     input_dim_list = self.layers_sizes,
                                    learning_rate = learning_rate)
        

    def fit(self, X, sess, learning_rate=0.15, inner_iteration = 50,
            iteration=20, batch_size=133, re_init = False,verbose=False):
        ## The first layer must be the input layer, so they should have same sizes.
        assert X.shape[1] == self.layers_sizes[0]
        ## initialize L, S
        self.L = np.zeros(X.shape)
        self.S = np.zeros(X.shape)
        ##LS0 = self.L + self.S
        ## To estimate the size of input X
        if verbose:
            print "X shape: ", X.shape
            print "L shape: ", self.L.shape
            print "S shape: ", self.S.shape

        for it in xrange(iteration):
            if verbose:
                print "Out iteration: " , it
            ## alternating project, first project to L
            self.L = X - self.S
            ## Using L to train the auto-encoder
            self.AE.fit(self.L, sess = sess,
                        iteration = inner_iteration,
                        batch_size = batch_size,
                        init = re_init,
                        verbose = verbose)
            ## get optmized L
            self.L = self.AE.getRecon(X = self.L, sess = sess)
            ## alternating project, now project to S and shrink S
            self.S = SHR.l21shrink(self.lambda_, (X - self.L).T).T
        return self.L , self.S
    def transform(self, X, sess):
        L = X - self.S
        return self.AE.transform(X = L, sess = sess)
    def getRecon(self, X, sess):
        return self.AE.getRecon(self.L, sess = sess)


In [7]:

def one_run(lam):
    with tf.Graph().as_default():
        with tf.Session() as sess:
            rae = RobustL21Autoencoder(sess = sess, lambda_= lam, 
                                       layers_sizes=[x.shape[1],int(x.shape[1]*0.5),int(x.shape[1]*0.25)],
                                      learning_rate = 0.01)
            L, S = rae.fit(x, sess = sess, 
                           inner_iteration = 50, 
                           iteration = 10,
                           re_init = True,
                           verbose = False)
    return L,S

In [12]:
size = 1000
x = np.load(r"./data/4_other_x.npk")[:size]
y = np.load(r"./data/4_other_y.npk")[:size]
stat = Counter(y)
print stat
print "number of majority:", stat[1]
print "number of outlier:", len(y) - stat[1]
print "outlier ratio:", (len(y) - stat[1])/ float(len(y))
def binary_error(value):
    if value != 0.0:
        return "o" # 'majority'
    else:
        return "m" #'outlier'
    
def binary_y(value):
    if value == 1:
        return "m"
    else:
        return "o"

bi_y = map(binary_y,y)
print Counter(bi_y)

Counter({1: 939, 0: 61})
number of majority: 939
number of outlier: 61
outlier ratio: 0.061
Counter({'m': 939, 'o': 61})


In [None]:
precisions=[]
lams=[]
recalls=[]
f1s = []
lam_list = np.arange(6.0,10.0,0.3)
print lam_list
for i,lam in enumerate(lam_list):
    L,S = one_run(lam=lam)
    predictions = map(binary_error,np.linalg.norm(S,axis = 1))
    p = precision(bi_y,predictions,labels=["o","m"],pos_label="o")
    r = recall(bi_y,predictions,labels=["o","m"],pos_label="o")
    f1 = f1_score(bi_y,predictions,labels=["o","m"],pos_label="o")
    print "lambda:", lam
    print "stat:", Counter(predictions)
    print "precision",p
    print "recall",r
    print "f1",f1
    lams.append(lam)
    precisions.append(p)
    recalls.append(r)
    f1s.append(f1)
#     print CM(bi_y,predictions)
    print "------------"
print len(lams),len(recalls),len(f1s),len(precisions)

[6.  6.3 6.6 6.9 7.2 7.5 7.8 8.1 8.4 8.7 9.  9.3 9.6 9.9]
lambda: 6.0
stat: Counter({'o': 625, 'm': 375})
precision 0.0944
recall 0.9672131147540983
f1 0.17201166180758018
------------
lambda: 6.3
stat: Counter({'m': 503, 'o': 497})
precision 0.11871227364185111
recall 0.9672131147540983
f1 0.2114695340501792
------------
lambda: 6.6
stat: Counter({'m': 620, 'o': 380})
precision 0.1368421052631579
recall 0.8524590163934426
f1 0.23582766439909297
------------
lambda: 6.8999999999999995
stat: Counter({'m': 735, 'o': 265})
precision 0.18490566037735848
recall 0.8032786885245902
f1 0.3006134969325153
------------
lambda: 7.199999999999999
stat: Counter({'m': 783, 'o': 217})
precision 0.17972350230414746
recall 0.639344262295082
f1 0.28057553956834536
------------
lambda: 7.499999999999999
stat: Counter({'m': 861, 'o': 139})
precision 0.2446043165467626
recall 0.5573770491803278
f1 0.33999999999999997
------------
