In [1]:
""" Deep Auto-Encoder implementation
    
    An auto-encoder works as follows:

    Data of dimension k is reduced to a lower dimension j using a matrix multiplication:
    softmax(W*x + b)  = x'
    
    where W is matrix from R^k --> R^j

    A reconstruction matrix W' maps back from R^j --> R^k

    so our reconstruction function is softmax'(W' * x' + b') 

    Now the point of the auto-encoder is to create a reduction matrix (values for W, b) 
    that is "good" at reconstructing  the original data. 

    Thus we want to minimize  ||softmax'(W' * (softmax(W *x+ b)) + b')  - x||
x
    A deep auto-encoder is nothing more than stacking successive layers of these reductions.
"""
import tensorflow as tf
import numpy as np
import pandas as pd
import math
import random
import os, sys
import re
from PIL import Image
from scipy.misc import imresize
from IPython.display import display
import matplotlib.pyplot as plt
%matplotlib inline
import scipy.spatial.distance as dist
import scipy.stats as ss

In [2]:
def rgb2gray (rgb):
    return np.dot (rgb[...,:3], [0.299, 0.587, 0.144])

def imshow_gray (im):
    plt.imshow (im, interpolation='nearest', cmap=plt.get_cmap ('gray'))

In [3]:
min_rows = 32
min_cols = 32
d = min_rows * min_cols
X = []
C = []

for base, dirs, files in os.walk ('Images'):
    for filename in files:
        name_jpeg = re.match (r'^(.*)\.JPEG$', filename)
        if name_jpeg:
            filepath = os.path.join (base, filename)
            im0 = imresize(np.asarray (Image.open(filepath, 'r')), (min_rows, min_cols))
            if len(im0.shape) == 3:
                im = rgb2gray (im0)
            X.extend(np.reshape (im, (1, d)))
            C.append(os.path.split(base)[1])
        
print("Found", len (X), "Images.\n")
X = np.asarray(X)

Found 8987 Images.



In [4]:
X -= np.mean(X, axis = 0)
cov = np.dot(X.T, X) / X.shape[0]
U,S,V = np.linalg.svd(cov)

In [5]:
Xrot = np.dot(X, U)
Xrot_reduced = np.dot(X, U[:,:32])
Xwhite = Xrot / np.sqrt(S + 1e-5)

In [6]:
def create(x, layer_sizes):
    
    # Build the encoding layers
    next_layer_input = x

    encoding_matrices = []
    bias_matrices = []
    for dim in layer_sizes:
        input_dim = int(next_layer_input.get_shape()[1])
        
        # Initialize W using random values in interval [-1/sqrt(n) , 1/sqrt(n)]
        W = tf.Variable(tf.random_uniform([input_dim, dim], -1.0 / math.sqrt(input_dim), 1.0 / math.sqrt(input_dim)))
        
        # Initialize b to zero
        b = tf.Variable(tf.zeros([dim]))
        
        # We are going to use tied-weights so store the W matrix for later reference.
        encoding_matrices.append(W)
        bias_matrices.append(b)
        
        output = tf.nn.tanh(tf.matmul(next_layer_input,W) + b)
        
        # the input into the next layer is the output of this layer
        next_layer_input = output
        
    # The fully encoded x value is now stored in the next_layer_input
    encoded_x = next_layer_input
    #print("0:", encoded_x.get_shape())
    
    # build the reconstruction layers by reversing the reductions
    layer_sizes.reverse()
    encoding_matrices.reverse()
    bias_matrices.reverse()
    
    for i, dim in enumerate(layer_sizes[1:] + [ int(x.get_shape()[1])]) :
        #print("1:", layer_sizes[1:] + [ int(x.get_shape()[1])])
        #print("2:", i, dim)
        # we are using tied weights, so just lookup the encoding matrix for this step and transpose it
        W = tf.transpose(encoding_matrices[i])
        b = tf.Variable(tf.zeros([dim]))
        output = tf.nn.tanh(tf.matmul(next_layer_input, W) + b)
        next_layer_input = output
        
    # the fully encoded and reconstructed value of x is here:
    reconstructed_x = next_layer_input
    
    return {
        'encoded': encoded_x,
        'decoded': reconstructed_x,
        'cost' : tf.sqrt(tf.reduce_mean(tf.square(x-reconstructed_x)))
    }

In [7]:
def deep_test(hlayers, X):
    sess = tf.Session()
    start_dim = X.shape[1]
    x = tf.placeholder("float", [None, start_dim])
    autoencoder = create(x, hlayers)
    init = tf.initialize_all_variables()
    sess.run(init)
    train_step = tf.train.GradientDescentOptimizer(0.1).minimize(autoencoder['cost'])
    for jj in range(10):
        nnSet = np.random.choice(trainix, X.shape[0])
        for i in nnSet:
            # send one image at a time:
            batch = []
            batch.append(np.random.normal(X[i], 0.05))
            sess.run(train_step, feed_dict={x: batch})
    jj=0 
    for i in testix:
        batch = []
        batch.append(np.random.normal(X[i], 0.001))
        X_enc[jj] = sess.run(autoencoder['encoded'], feed_dict={x: batch})
        jj+=1

In [16]:
testix = np.random.randint(0, Xwhite.shape[0], int(0.2*Xwhite.shape[0]))
trainix = [x for x in range(Xwhite.shape[0]) if x not in testix]
X_enc = np.zeros((len(testix), 30))
C_test = [C[x] for x in testix]

In [23]:
if __name__ == '__main__':
    deep_test([50, 30], Xwhite)

In [24]:
#np.save("X_enc", X_enc)
#np.save("X", X)
#np.save("C", C)
#np.save("corrLUnsorted", corrL)
#np.save("corrLSorted", corrLS)
#X = np.load('X.npy')
#X_enc = np.load("X_enc.npy")
#C = np.load("C.npy")
#corrL = np.load("corrLSorted.npy")

In [25]:
# Pearson's correlation coefficients
corrL = dist.squareform(dist.pdist(X_enc, lambda x, y: ss.pearsonr(x, y)[0]))

In [26]:
corrLS = corrL
for i in range(corrL.shape[0]):
    corrLS[i] = sorted(range(len(corrL[i])), key=lambda j: abs(corrL[i,j]), reverse=True)

In [27]:
set(C)

{'Animal',
 'Fungus',
 'Geological Formation',
 'Person',
 'Plant, flora, plant life',
 'Sport'}

In [28]:
confuMat = pd.DataFrame(data=0.0, 
index = ['Animal','Geological Formation','Person','Plant, flora, plant life','Fungus', 'Sport'], 
columns = ['Animal','Geological Formation','Person','Plant, flora, plant life','Fungus','Sport', 'Accuracy'])

from collections import Counter
for i in range(corrLS.shape[0]):
    predClass = Counter(C_test[x] for x in map(int, corrLS[i,:5])).most_common(1)[0][0]
    confuMat[C_test[i]][predClass] +=1

In [29]:
for i in range(confuMat.shape[0]):
    confuMat.iloc[i]['Accuracy'] = confuMat.iloc[i][i]/np.sum(confuMat.iloc[i])

In [30]:
confuMat

Unnamed: 0,Animal,Geological Formation,Person,"Plant, flora, plant life",Fungus,Sport,Accuracy
Animal,51.0,43.0,32.0,33.0,25.0,42.0,0.225664
Geological Formation,89.0,125.0,53.0,68.0,46.0,96.0,0.262055
Person,41.0,38.0,30.0,31.0,25.0,48.0,0.140845
"Plant, flora, plant life",42.0,48.0,48.0,62.0,31.0,48.0,0.222222
Fungus,25.0,29.0,29.0,23.0,35.0,38.0,0.195531
Sport,71.0,66.0,63.0,52.0,48.0,123.0,0.29078


In [107]:
testix = np.random.randint(0, X.shape[0], int(0.2*X.shape[0]))
trainix = [x for x in range(X.shape[0]) if x not in testix]
X_enc = np.zeros((len(testix), 30))
C_test = [C[x] for x in testix]

In [None]:
X_enc = np.zeros((X.shape[0], 30))
if __name__ == '__main__':
    deep_test([30], X)

In [None]:
#X_encSm = X_enc
#np.save("X_encSm", X_encSm)
#X_encSm = np.load('X_encSm.npy')

In [None]:
import scipy.spatial.distance as dist
import scipy.stats as ss
# Pearson's correlation coefficients
corrLSm = dist.squareform(dist.pdist(X_enc, lambda x, y: ss.pearsonr(x, y)[0]))

In [None]:
#np.save('corrLSm', corrLSm)

In [None]:
corrLSmS = corrLSm
for i in range(corrLSm.shape[0]):
    corrLSmS[i] = sorted(range(len(corrLSm[i])), key=lambda j: abs(corrLSm[i,j]), reverse=True)

In [None]:
#np.save('corrLSmS', corrLSmS)

In [None]:
confuMatSm = pd.DataFrame(data=0.0, 
index = ['Animal','Geological Formation','Person','Plant, flora, plant life','Fungus', 'Sport'], 
columns = ['Animal','Geological Formation','Person','Plant, flora, plant life','Fungus','Sport', 'Accuracy'])

from collections import Counter
for i in range(corrLSmS.shape[0]):
    predClass = Counter(C[x] for x in map(int, corrLSmS[i,:10])).most_common(1)[0][0]
    confuMatSm[C[i]][predClass] +=1

for i in range(confuMatSm.shape[0]):
    confuMatSm.iloc[i]['Accuracy'] = confuMatSm.iloc[i][i]/np.sum(confuMatSm.iloc[i])

In [None]:
confuMatSm