## Load YaleB data
## Extract DSIFT features

In [1]:
import matlab.engine
eng = matlab.engine.start_matlab()
eng.cd("./SSC_ADMM_v1.1")

'C:\\Users\\aleks_000\\Desktop\\Mentorship\\!GitHub'

In [2]:
from load import load_Coil20
from img2matrix import single_img2dsift

images_raw, labels = load_Coil20()
images_dsift = [single_img2dsift(image) for image in images_raw]


Loading Coil20...
----------------
Elapsed: 3.14 sec


In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 2)

ax[0].imshow(images_raw[0].reshape((-1, 128)));
ax[1].imshow(images_dsift[0].reshape((-1, 128)));

In [None]:
import numpy as np

fig, ax = plt.subplots(1, 2)

ax[0].imshow(np.mean(images_raw, axis=0).reshape((-1, 128)));
ax[1].imshow(np.mean(images_dsift, axis=0).reshape((-1, 128)));

In [None]:
fig, ax = plt.subplots(1, 2)

ax[0].imshow(np.std(images_raw, axis=0).reshape((-1, 128)));
ax[1].imshow(np.std(images_dsift, axis=0).reshape((-1, 128)));

In [6]:
from visualize import tSNE_2D, tSNE_3D

In [None]:
%%time
tSNE_2D(images_raw, labels)
tSNE_3D(images_raw, labels)

In [None]:
%%time
tSNE_2D(images_dsift, labels)
tSNE_3D(images_dsift, labels)

## Perform PCA

In [7]:
from sklearn.decomposition import PCA
import numpy as np
    
pca = PCA(n_components=300, whiten=False, svd_solver='arpack', random_state=0)
#images_pca = pca.fit_transform(np.concatenate((1*images_raw, 10*images_dsift), axis=1))
images_pca = pca.fit_transform(images_dsift)

images_pca.shape

(1440, 300)

In [8]:
def display_image(image, height, width):
    print(np.min(image), np.max(image))
    imgplot = plt.imshow(image.reshape((height, width)))

In [None]:
import matplotlib.pyplot as plt

# these are reduced parameters - not supposed to look like anything
display_image(images_pca[0], 10, 30)

In [None]:
display_image(np.mean(images_pca, axis=0), 10, 30)

In [None]:
display_image(np.std(images_pca, axis=0), 10, 30)

In [None]:
%%time
tSNE_2D(images_pca, labels)
tSNE_3D(images_pca, labels)

## Normalize PCA output

In [12]:
# Types:
# feature-wise - normalization occurs along one pixel of all images
# image-wise - normalization occurs along all pixels of one image
# all - normalization occurs along all pixels of all images

# Methods:
# standard - mean is set to 0, std is set to 1
# [-1, 1] - min and max are used to linearly change the data range to [-1, 1]
# unit-vector - each bin is divided by its euclidean distance

#[BAD] feature, standard
#images_norm = (images_pca - np.mean(images_pca, axis=0)) / np.std(images_pca, axis=0)
# image, standard
#images_norm = (images_pca - np.mean(images_pca, axis=1)[:, np.newaxis]) / np.std(images_pca, axis=1)[:, np.newaxis]
# all, standard
#images_norm = (images_pca - np.mean(images_pca)) / np.std(images_pca)


#[BAD] feature, [-1, 1]
#mmin = np.min(images_pca, axis=0)
#mmax = np.max(images_pca, axis=0)
#[BAD] image, [-1, 1]
#mmin = np.min(images_pca, axis=1)[:, np.newaxis]
#mmax = np.max(images_pca, axis=1)[:, np.newaxis]
# all, [-1, 1]
mmin = np.min(images_pca)
mmax = np.max(images_pca)
# FOR ALL:
images_norm = (2*images_pca - mmax - mmin) / (mmax - mmin)

#[BAD] feature, unit
#images_norm = images_pca / np.sqrt(np.sum(images_pca*images_pca, axis=0))
# image, unit
#images_norm = images_pca / np.sqrt(np.sum(images_pca*images_pca, axis=1))[:, np.newaxis]
# all, unit
#images_norm = images_pca / np.sqrt(np.sum(images_pca*images_pca))

In [None]:
import matplotlib.pyplot as plt

# these are reduced parameters - not supposed to look like anything
display_image(images_norm[0], 10, 30)

In [None]:
display_image(np.mean(images_norm, axis=0), 10, 30)

In [None]:
display_image(np.std(images_norm, axis=0), 10, 30)

In [None]:
%%time
tSNE_2D(images_norm, labels)
tSNE_3D(images_norm, labels)

## Calculate C matrix

In [16]:
import numpy as np
import supporting_files.sda as sda

from supporting_files.helpers import optimize
from scipy.io import savemat, loadmat

In [17]:
# Matlab SSC #1
savemat('./temp.mat', mdict={'X': images_norm})
k = len(np.unique(labels))
alpha = 50.0
maxIter = 25
eng.SSC_modified(k, 0, False, alpha, False, 1, 1e-20, maxIter, False)
C = loadmat("./temp.mat")['C']

In [None]:
display_image(C[:64*3, :64*3], 64*3, 64*3)

In [None]:
display_image(np.matmul(C, images_norm)[0], 10, 30)
print(np.mean(np.square(images_norm - np.matmul(C, images_norm))))

## Train Autoencoder

In [40]:
import dsc
import importlib
importlib.reload(dsc)

<module 'dsc' from 'C:\\Users\\aleks_000\\Desktop\\Mentorship\\!GitHub\\dsc.py'>

In [61]:
%%time
d = dsc.DeepSubspaceClustering(images_norm, C=C, hidden_dims=[200, 150, 200], lambda1=0.0001, lambda2=0.001, learning_rate=0.0003,
                               weight_init='sda-normal', weight_init_params=[251, 0.006, images_norm.shape[0], 50],
                               optimizer='Adam', decay='sqrt', sda_optimizer='Adam', sda_decay='sqrt')

Layer 1
300 -> 200
epoch 0: global loss = 0.2754162847995758
epoch 50: global loss = 0.06569532305002213
epoch 100: global loss = 0.05616101995110512
epoch 150: global loss = 0.05111948400735855
epoch 200: global loss = 0.047819774597883224
epoch 250: global loss = 0.04539836570620537
Layer 2
200 -> 150
epoch 0: global loss = 0.16701576113700867
epoch 50: global loss = 0.053260814398527145
epoch 100: global loss = 0.04400462284684181
epoch 150: global loss = 0.038889769464731216
epoch 200: global loss = 0.03532810136675835
epoch 250: global loss = 0.03262083977460861
Wall time: 30.9 s


In [62]:
%%time
#d.optimizer = optimize(d.cost, 0.0003, 'Adam', 'sqrt', d.global_step)
d.train(batch_size=images_norm.shape[0], epochs=101, print_step=25)
images_HM2 = d.result
images_HM = d.reconstr


epoch 0: global loss = 0.0035060348454862833
epoch 25: global loss = 0.002822522073984146
epoch 50: global loss = 0.0027746555861085653
epoch 75: global loss = 0.002745118224993348
epoch 100: global loss = 0.0027205594815313816
Wall time: 34.6 s


In [None]:
display_image(images_HM2[0], 10, 15)

In [None]:
display_image(np.matmul(C, images_HM2)[0], 10, 15)
print(np.mean(np.square(images_HM2 - np.matmul(C, images_HM2))))

In [None]:
display_image(np.mean(images_HM2, axis=0), 10, 15)

In [None]:
display_image(np.std(images_HM2, axis=0), 10, 15)

In [64]:
%%time
tSNE_2D(images_HM2, labels)
tSNE_3D(images_HM2, labels)

NameError: name 'tSNE_2D' is not defined

In [None]:
# AE Reconstruction
fig, ax = plt.subplots(2)

index = 0;
ax[0].imshow(images_norm[index].reshape((10, 30)));
ax[1].imshow(images_HM[index].reshape((10, 30)));

In [68]:
# Matlab SSC #2
savemat('./temp.mat', mdict={'X': images_HM2})
k = len(np.unique(labels))
alpha = 16.0
maxIter = 25
grps = eng.SSC_modified(k, 0, False, alpha, False, 1, 1e-20, maxIter, True)
C_after = loadmat("./temp.mat")['C']
labels_pred = np.asarray(grps, dtype=np.int32).flatten()

In [69]:
# Python alternative:
from sklearn import cluster

def post_proC(C, K, d, alpha):
    # C: coefficient matrix, K: number of clusters, d: dimension of each subspace
    n = C.shape[0]
    C = 0.5*(C + C.T)
    C = C - np.diag(np.diag(C)) + np.eye(n,n) # for sparse C, this step will make the algorithm more numerically stable
    r = d*K + 1
    U, S, _ = svds(C,r,v0 = np.ones(n))
    U = U[:,::-1] 
    S = np.sqrt(S[::-1])
    S = np.diag(S)
    U = U.dot(S)
    U = normalize(U, norm='l2', axis = 1)  
    Z = U.dot(U.T)
    Z = Z * (Z>0)
    L = np.abs(Z ** alpha)
    L = L/L.max()
    L = 0.5 * (L + L.T)
    spectral = cluster.SpectralClustering(n_clusters=K, eigen_solver='arpack', affinity='precomputed', assign_labels='discretize')
    spectral.fit(L)
    grp = spectral.fit_predict(L) + 1
    return grp, L

def best_map(L1,L2):
    #L1 should be the labels and L2 should be the clustering number we got
    Label1 = np.unique(L1)
    nClass1 = len(Label1)
    Label2 = np.unique(L2)
    nClass2 = len(Label2)
    nClass = np.maximum(nClass1,nClass2)
    G = np.zeros((nClass,nClass))
    for i in range(nClass1):
        ind_cla1 = L1 == Label1[i]
        ind_cla1 = ind_cla1.astype(float)
        for j in range(nClass2):
            ind_cla2 = L2 == Label2[j]
            ind_cla2 = ind_cla2.astype(float)
            G[i,j] = np.sum(ind_cla2 * ind_cla1)
    m = Munkres()
    index = m.compute(-G.T)
    index = np.array(index)
    c = index[:,1]
    newL2 = np.zeros(L2.shape)
    for i in range(nClass2):
        newL2[L2 == Label2[i]] = Label1[c[i]]
    return newL2

In [None]:
labels_pred2 = post_proC(C_after, k, 0, alpha)

In [None]:
%%time
tSNE_2D(images_HM2, labels_pred)
tSNE_3D(images_HM2, labels_pred)

## Perform clustering with SSC

In [69]:
from supporting_files.ji_zhang import err_rate
from sklearn.metrics import normalized_mutual_info_score as nmi
from sklearn.metrics import adjusted_rand_score as ari

print("Accuracy: ", str(1-err_rate(labels, labels_pred)))
print("NMI: ", str(nmi(labels, labels_pred, average_method="geometric")))
print("ARI: ", str(ari(labels, labels_pred)))

Accuracy:  0.68125
NMI:  0.8796923866842551
ARI:  0.6115330143438436
