In [13]:
""" Deep Auto-Encoder implementation
    
    An auto-encoder works as follows:

    Data of dimension k is reduced to a lower dimension j using a matrix multiplication:
    softmax(W*x + b)  = x'
    
    where W is matrix from R^k --> R^j

    A reconstruction matrix W' maps back from R^j --> R^k

    so our reconstruction function is softmax'(W' * x' + b') 

    Now the point of the auto-encoder is to create a reduction matrix (values for W, b) 
    that is "good" at reconstructing  the original data. 

    Thus we want to minimize  ||softmax'(W' * (softmax(W *x+ b)) + b')  - x||

    A deep auto-encoder is nothing more than stacking successive layers of these reductions.
"""
import tensorflow as tf
import numpy as np
import pandas as pd
import math
import random
import os
import re
from PIL import Image
from scipy.misc import imresize
from IPython.display import display
import matplotlib.pyplot as plt
%matplotlib inline



In [3]:
def rgb2gray (rgb):
    return np.dot (rgb[...,:3], [0.299, 0.587, 0.144])

def imshow_gray (im):
    plt.imshow (im, interpolation='nearest', cmap=plt.get_cmap ('gray'))

In [6]:
min_rows = 32
min_cols = 32
d = min_rows * min_cols
X = []
C = []

for base, dirs, files in os.walk ('Images'):
    for filename in files:
        name_jpeg = re.match (r'^(.*)\.JPEG$', filename)
        if name_jpeg:
            filepath = os.path.join (base, filename)
            im0 = imresize(np.asarray (Image.open(filepath, 'r')), (min_rows, min_cols))
            if len(im0.shape) == 3:
                im = rgb2gray (im0)
            X.extend(np.reshape (im, (1, d)))
            C.append(os.path.split(base)[1])
        
print "Found", len (X), "Images.\n"
X = np.asarray(X)

Found 8987 Images.



In [7]:
meanX = np.mean(X, axis=0)
stdX = np.std(X, axis=0)

In [8]:
for i in range(X.shape[0]):
    X[i] =(X[i] - meanX)/stdX

In [19]:
def create(x, layer_sizes):
    
    # Build the encoding layers
    next_layer_input = x

    encoding_matrices = []
    for dim in layer_sizes:
        input_dim = int(next_layer_input.get_shape()[1])
        
        # Initialize W using random values in interval [-1/sqrt(n) , 1/sqrt(n)]
        W = tf.Variable(tf.random_uniform([input_dim, dim], -1.0 / math.sqrt(input_dim), 1.0 / math.sqrt(input_dim)))
        
        # Initialize b to zero
        b = tf.Variable(tf.zeros([dim]))
        
        # We are going to use tied-weights so store the W matrix for later reference.
        encoding_matrices.append(W)
        
        output = tf.nn.tanh(tf.matmul(next_layer_input,W) + b)
        
        # the input into the next layer is the output of this layer
        next_layer_input = output
        
    # The fully encoded x value is now stored in the next_layer_input
    encoded_x = next_layer_input
    
    # build the reconstruction layers by reversing the reductions
    layer_sizes.reverse()
    encoding_matrices.reverse()
    
    for i, dim in enumerate(layer_sizes[1:] + [ int(x.get_shape()[1])]) :
        # we are using tied weights, so just lookup the encoding matrix for this step and transpose it
        W = tf.transpose(encoding_matrices[i])
        b = tf.Variable(tf.zeros([dim]))
        output = tf.nn.tanh(tf.matmul(next_layer_input,W) + b)
        next_layer_input = output
        
    # the fully encoded and reconstructed value of x is here:
    reconstructed_x = next_layer_input
    
    return {
        'encoded': encoded_x,
        'decoded': reconstructed_x,
        'cost' : tf.sqrt(tf.reduce_mean(tf.square(x-reconstructed_x)))
    }

In [10]:
X_enc = np.zeros((X.shape[0], 30))
def deep_test(hlayers):
    sess = tf.Session()
    start_dim = X.shape[1]
    x = tf.placeholder("float", [None, start_dim])
    autoencoder = create(x, hlayers)
    init = tf.initialize_all_variables()
    sess.run(init)
    train_step = tf.train.GradientDescentOptimizer(0.1).minimize(autoencoder['cost'])

    for i in range(X.shape[0]):
        cost_old = 1.
        cost_new = 0.
        # send one image at a time:
        print "Record:", i
        batch = []
        batch.append(np.random.normal(X[i], 0.05))
        j=0
        while abs(cost_old - cost_new) > 0.001:
            cost_old = cost_new
            sess.run(train_step, feed_dict={x: np.array(batch)})
            cost_new = sess.run(autoencoder['cost'], feed_dict={x: batch})
            j = j + 1
            if abs(cost_old - cost_new) <= 0.001:
                #print j, "New:", cost_new, "Old:", cost_old
                X_enc[i] = sess.run(autoencoder['encoded'], feed_dict={x: batch})

# Two hidden layers 1024->50->30

In [14]:
if __name__ == '__main__':
    deep_test([50, 30])

Record: 0
Record: 1
Record: 2
Record: 3
Record: 4
Record: 5
Record: 6
Record: 7
Record: 8
Record: 9
Record: 10
Record: 11
Record: 12
Record: 13
Record: 14
Record: 15
Record: 16
Record: 17
Record: 18
Record: 19
Record: 20
Record: 21
Record: 22
Record: 23
Record: 24
Record: 25
Record: 26
Record: 27
Record: 28
Record: 29
Record: 30
Record: 31
Record: 32
Record: 33
Record: 34
Record: 35
Record: 36
Record: 37
Record: 38
Record: 39
Record: 40
Record: 41
Record: 42
Record: 43
Record: 44
Record: 45
Record: 46
Record: 47
Record: 48
Record: 49
Record: 50
Record: 51
Record: 52
Record: 53
Record: 54
Record: 55
Record: 56
Record: 57
Record: 58
Record: 59
Record: 60
Record: 61
Record: 62
Record: 63
Record: 64
Record: 65
Record: 66
Record: 67
Record: 68
Record: 69
Record: 70
Record: 71
Record: 72
Record: 73
Record: 74
Record: 75
Record: 76
Record: 77
Record: 78
Record: 79
Record: 80
Record: 81
Record: 82
Record: 83
Record: 84
Record: 85
Record: 86
Record: 87
Record: 88
Record: 89
Record: 90
Record: 9

In [5]:
#np.save("X_enc", X_enc)
#np.save("X", X)
#np.save("C", C)
#np.save("corrLUnsorted", corrL)
#np.save("corrLSorted", corrLS)
#X = np.load('X.npy')
#X_enc = np.load("X_enc.npy")
#C = np.load("C.npy")
#corrL = np.load("corrLSorted.npy")

In [5]:
import scipy.spatial.distance as dist
import scipy.stats as ss

# Pearson's correlation coefficients
corrL = dist.squareform(dist.pdist(X_enc, lambda x, y: ss.pearsonr(x, y)[0]))

In [11]:
corrLS = corrL
for i in range(corrL.shape[0]):
    corrLS[i] = sorted(range(len(corrL[i])), key=lambda j: abs(corrL[i,j]), reverse=True)

In [6]:
set(C)

{'Animal',
 'Fungus',
 'Geological Formation',
 'Person',
 'Plant, flora, plant life',
 'Sport'}

In [15]:
confuMat = pd.DataFrame(data=0.0, 
index = ['Animal','Geological Formation','Person','Plant, flora, plant life','Fungus', 'Sport'], 
columns = ['Animal','Geological Formation','Person','Plant, flora, plant life','Fungus','Sport', 'Accuracy'])

from collections import Counter
for i in range(corrLS.shape[0]):
    predClass = Counter(C[map(int, corrLS[i,:5])]).most_common(1)[0][0]
    confuMat[C[i]][predClass] +=1

for i in range(confuMat.shape[0]):
    confuMat.iloc[i]['Accuracy'] = confuMat.iloc[i][i]/np.sum(confuMat.iloc[i])

In [16]:
confuMat

Unnamed: 0,Animal,Geological Formation,Person,"Plant, flora, plant life",Fungus,Sport,Accuracy
Animal,1299.0,33.0,18.0,29.0,110.0,42.0,0.848465
Geological Formation,55.0,1374.0,125.0,71.0,116.0,56.0,0.764608
Person,22.0,155.0,899.0,170.0,49.0,101.0,0.643983
"Plant, flora, plant life",38.0,73.0,80.0,747.0,26.0,124.0,0.686581
Fungus,109.0,113.0,57.0,51.0,860.0,70.0,0.68254
Sport,48.0,60.0,63.0,203.0,46.0,1495.0,0.780679


# Training with one hidden layer 1024->30

In [20]:
X_enc = np.zeros((X.shape[0], 30))
if __name__ == '__main__':
    deep_test([30])

Record: 0
Record: 1
Record: 2
Record: 3
Record: 4
Record: 5
Record: 6
Record: 7
Record: 8
Record: 9
Record: 10
Record: 11
Record: 12
Record: 13
Record: 14
Record: 15
Record: 16
Record: 17
Record: 18
Record: 19
Record: 20
Record: 21
Record: 22
Record: 23
Record: 24
Record: 25
Record: 26
Record: 27
Record: 28
Record: 29
Record: 30
Record: 31
Record: 32
Record: 33
Record: 34
Record: 35
Record: 36
Record: 37
Record: 38
Record: 39
Record: 40
Record: 41
Record: 42
Record: 43
Record: 44
Record: 45
Record: 46
Record: 47
Record: 48
Record: 49
Record: 50
Record: 51
Record: 52
Record: 53
Record: 54
Record: 55
Record: 56
Record: 57
Record: 58
Record: 59
Record: 60
Record: 61
Record: 62
Record: 63
Record: 64
Record: 65
Record: 66
Record: 67
Record: 68
Record: 69
Record: 70
Record: 71
Record: 72
Record: 73
Record: 74
Record: 75
Record: 76
Record: 77
Record: 78
Record: 79
Record: 80
Record: 81
Record: 82
Record: 83
Record: 84
Record: 85
Record: 86
Record: 87
Record: 88
Record: 89
Record: 90
Record: 9

In [4]:
#X_encSm = X_enc
#np.save("X_encSm", X_encSm)
#X_encSm = np.load('X_encSm.npy')

In [36]:
import scipy.spatial.distance as dist
import scipy.stats as ss
# Pearson's correlation coefficients
corrLSm = dist.squareform(dist.pdist(X_encSm, lambda x, y: ss.pearsonr(x, y)[0]))

In [38]:
#np.save('corrLSm', corrLSm)

In [40]:
corrLSmS = corrLSm
for i in range(corrLSm.shape[0]):
    corrLSmS[i] = sorted(range(len(corrLSm[i])), key=lambda j: abs(corrLSm[i,j]), reverse=True)

In [41]:
#np.save('corrLSmS', corrLSmS)

In [44]:
confuMatSm = pd.DataFrame(data=0.0, 
index = ['Animal','Geological Formation','Person','Plant, flora, plant life','Fungus', 'Sport'], 
columns = ['Animal','Geological Formation','Person','Plant, flora, plant life','Fungus','Sport', 'Accuracy'])

from collections import Counter
for i in range(corrLSmS.shape[0]):
    predClass = Counter(C[map(int, corrLSmS[i,:5])]).most_common(1)[0][0]
    confuMatSm[C[i]][predClass] +=1

for i in range(confuMatSm.shape[0]):
    confuMatSm.iloc[i]['Accuracy'] = confuMatSm.iloc[i][i]/np.sum(confuMatSm.iloc[i])

In [45]:
confuMatSm

Unnamed: 0,Animal,Geological Formation,Person,"Plant, flora, plant life",Fungus,Sport,Accuracy
Animal,1099.0,79.0,13.0,32.0,268.0,25.0,0.724934
Geological Formation,90.0,1248.0,190.0,124.0,174.0,44.0,0.66738
Person,34.0,195.0,758.0,363.0,47.0,98.0,0.507023
"Plant, flora, plant life",29.0,67.0,168.0,536.0,30.0,100.0,0.576344
Fungus,274.0,177.0,47.0,50.0,668.0,32.0,0.535256
Sport,45.0,42.0,66.0,166.0,20.0,1589.0,0.82417
