## Load Coil20 data
## Extract DSIFT features

In [1]:
import matlab.engine
eng = matlab.engine.start_matlab()
eng.cd("./SSC_ADMM_v1.1")

'C:\\Users\\aleks_000\\Desktop\\Mentorship\\!GitHub'

In [2]:
from load import load_Coil20
from img2matrix import single_img2dsift

# I renamed files 1, 2.. to 01, 02..
# so that they are in order here
images_raw, labels = load_Coil20()
images_dsift = [single_img2dsift(image) for image in images_raw]


Loading Coil20...
----------------
Elapsed: 3.75 sec


In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 2)

ax[0].imshow(images_raw[0].reshape((-1, 128)));
ax[1].imshow(images_dsift[0].reshape((-1, 128)));

In [None]:
import numpy as np

fig, ax = plt.subplots(1, 2)

ax[0].imshow(np.mean(images_raw, axis=0).reshape((-1, 128)));
ax[1].imshow(np.mean(images_dsift, axis=0).reshape((-1, 128)));

In [None]:
fig, ax = plt.subplots(1, 2)
idx = 0

ax[0].imshow(np.mean(images_raw[72*idx:72*(idx+1)], axis=0).reshape((-1, 128)));
ax[1].imshow(np.mean(images_dsift[72*idx:72*(idx+1)], axis=0).reshape((-1, 128)));

In [None]:
fig, ax = plt.subplots(1, 2)

ax[0].imshow(np.std(images_raw, axis=0).reshape((-1, 128)));
ax[1].imshow(np.std(images_dsift, axis=0).reshape((-1, 128)));

In [7]:
from visualize import tSNE_2D, tSNE_3D

In [8]:
images_raw_flat = images_raw.reshape(images_raw.shape[0], -1)
# tSNE_2D(images_raw_flat, labels)
# tSNE_3D(images_raw_flat, labels)

In [9]:
# tSNE_2D(images_dsift, labels)
# tSNE_3D(images_dsift, labels)

## Perform PCA

In [253]:
from sklearn.decomposition import PCA
import numpy as np
    
pca = PCA(n_components=1000, whiten=False, svd_solver='arpack', random_state=0)
images_pca = pca.fit_transform(images_dsift)

images_pca.shape

(1440, 1000)

In [11]:
import matplotlib.pyplot as plt

def display_image(image, height, width):
    print(np.min(image), np.max(image))
    imgplot = plt.imshow(image.reshape((height, width)))

In [None]:
# these are reduced parameters - not supposed to look like anything
display_image(images_pca[0], 25, 40)

In [None]:
display_image(np.mean(images_pca, axis=0), 15, 20)

In [None]:
display_image(np.std(images_pca, axis=0), 15, 20)

In [15]:
# tSNE_2D(images_pca, labels)
# tSNE_3D(images_pca, labels)

## Normalize PCA output

In [316]:
# Types:
# feature-wise - normalization occurs along one pixel of all images
# image-wise - normalization occurs along all pixels of one image
# all - normalization occurs along all pixels of all images

# Methods:
# standard - mean is set to 0, std is set to 1
# [-1, 1] - min and max are used to linearly change the data range to [-1, 1]
# unit-vector - each bin is divided by its euclidean distance

#[BAD] feature, standard
#images_norm = (images_pca - np.mean(images_pca, axis=0)) / np.std(images_pca, axis=0)
# image, standard
#images_norm = (images_pca - np.mean(images_pca, axis=1)[:, np.newaxis]) / np.std(images_pca, axis=1)[:, np.newaxis]
# all, standard
#images_norm = (images_pca - np.mean(images_pca)) / np.std(images_pca)


#[BAD] feature, [-1, 1]
#mmin = np.min(images_pca, axis=0)
#mmax = np.max(images_pca, axis=0)
#[BAD] image, [-1, 1]
#mmin = np.min(images_pca, axis=1)[:, np.newaxis]
#mmax = np.max(images_pca, axis=1)[:, np.newaxis]
# all, [-1, 1]
mmin = np.min(images_raw_flat)
mmax = np.max(images_raw_flat)
# FOR ALL:
images_norm = (np.multiply(images_raw_flat, 2, dtype='float64') - mmax - mmin) / (mmax - mmin)

#[BAD] feature, unit
#images_norm = images_pca / np.sqrt(np.sum(images_pca*images_pca, axis=0))
# image, unit
#images_norm = images_pca / np.sqrt(np.sum(images_pca*images_pca, axis=1))[:, np.newaxis]
# all, unit
#images_norm = images_pca / np.sqrt(np.sum(images_pca*images_pca))

In [None]:
import matplotlib.pyplot as plt

# these are reduced parameters - not supposed to look like anything
display_image(images_norm[0], 128, 128)

In [None]:
display_image(np.mean(images_norm, axis=0), 15, 20)

In [None]:
display_image(np.std(images_norm, axis=0), 15, 20)

In [20]:
# tSNE_2D(images_norm, labels)
# tSNE_3D(images_norm, labels)

In [21]:
# from scipy.io import savemat

# savemat('./saved/raw/Coil20.mat', mdict={'X':images_raw, 'Y':labels})
# savemat('./saved/processed/Coil20.mat', mdict={'X':images_norm, 'Y':labels})

## Hyperparameters

In [260]:
lamb1 = 0.0
lamb2 = 0.00508823385491712
lamb3 = 0.0

## Calculate C matrix

In [None]:
import numpy as np
import supporting_files.sda as sda

from supporting_files.helpers import optimize
from scipy.io import savemat, loadmat

In [None]:
# Matlab SSC #1
savemat('./temp.mat', mdict={'X': images_norm})
k = len(np.unique(labels))
alpha = 99999.0
maxIter = 63
eng.SSC_modified(k, 0, False, alpha, False, 1, 1e-20, maxIter, False, 0)
C = loadmat("./temp.mat")['C']

In [None]:
display_image(C[:110, :110], 110, 110)
print(np.mean(np.square(C)))

In [None]:
fig, ax = plt.subplots(2)

index = 0;
ax[0].imshow(images_norm[index].reshape((10, 30)));
ax[1].imshow(np.matmul(C, images_norm)[index].reshape((10, 30)));
print(np.mean(np.square(images_norm - np.matmul(C, images_norm))))

## Train Autoencoder

In [295]:
import dsc
import importlib
importlib.reload(dsc)

<module 'dsc' from 'C:\\Users\\aleks_000\\Desktop\\Mentorship\\!GitHub\\dsc.py'>

In [None]:
%%time
trainC = False
C = None
d = dsc.DeepSubspaceClustering(images_norm, C=C, trainC=trainC, hidden_dims=[500, 300, 500],
                               lambda1=lamb1, lambda2=lamb2, lambda3=lamb3, weight_init='sda-normal',
                               weight_init_params=[1000, 0.005, images_norm.shape[0], 50],
                               optimizer='Adam', decay='sqrt', sda_optimizer='Adam', sda_decay='sqrt')

In [None]:
%%time
d.optimizer = optimize(d.cost, 0.0001, 'Adam', 'sqrt', d.global_step)
d.train(batch_size=images_norm.shape[0], epochs=1000, print_step=50)
images_HM2 = d.result
images_HM = d.reconstr
if(trainC):
    trained_C = np.float64(d.outC)
else:
    trained_C = C

In [None]:
# lambda3 - regularization on trained_C
display_image(trained_C[:100, :100], 100, 100)

In [None]:
display_image(images_HM2[0], 15, 20)

In [None]:
# lambda1 - self-expressiveness
display_image(np.matmul(trained_C, images_HM2)[0], 10, 15)
print(np.mean(np.square(images_HM2 - np.matmul(trained_C, images_HM2))))

In [None]:
display_image(np.mean(images_HM2, axis=0), 10, 15)

In [None]:
display_image(np.std(images_HM2, axis=0), 10, 15)

In [324]:
# tSNE_2D(images_HM2, labels)
# tSNE_3D(images_HM2, labels)

In [None]:
# AE Reconstruction
fig, ax = plt.subplots(1, 2)

index = 0;
ax[0].imshow(images_norm[index].reshape((128, 128)));
ax[1].imshow(images_HM[index].reshape((128, 128)));

In [269]:
reconstr = pca.inverse_transform(((images_HM * (mmax - mmin)) + mmax + mmin) / 2)
pca_reconstr = pca.inverse_transform(((images_norm * (mmax - mmin)) + mmax + mmin) / 2)

In [None]:
# AE + invPCA Reconstruction
fig, ax = plt.subplots(1, 3)

index = 0;
ax[0].imshow(images_dsift[index].reshape((-1, 128)));
ax[1].imshow(pca_reconstr[index].reshape((-1, 128)));
ax[2].imshow(reconstr[index].reshape((-1, 128)));

In [271]:
# Only for Coil20 non-dsift

# from scipy.io import savemat

# images_raw_duck = images_raw[720:792]
# images_HM2_duck = images_HM2[720:792]
# reconstr_duck = reconstr[720:792]
# pca_duck = images_norm[720:792]
# pca_reconstr_duck = pca_reconstr[720:792]
# savemat("../!Important/figures/AE_big", mdict={'H0': images_raw_duck, 'HM2': images_HM2_duck, 'HM': reconstr_duck,
#                                                'PCA': pca_duck, 'PCAr': pca_reconstr_duck})

In [346]:
# Matlab SSC #2
k = len(np.unique(labels))
alpha = 99999.0
maxIter = 63
if(not trainC):
    savemat('./temp.mat', mdict={'X': images_HM2})
else:
    savemat('./temp.mat', mdict={'C': C})
grps = eng.SSC_modified(k, 0, False, alpha, False, 1, 1e-20, maxIter, True, 0, trainC)
C_after = loadmat("./temp.mat")['C']
labels_pred = np.asarray(grps, dtype=np.int32).flatten()

In [None]:
display_image(C_after[:100, :100], 100, 100)

In [246]:
# tSNE_2D(images_HM2, labels_pred)
# tSNE_3D(images_HM2, labels_pred)

## Perform clustering with SSC

In [354]:
from supporting_files.ji_zhang import err_rate
from sklearn.metrics import normalized_mutual_info_score as nmi
from sklearn.metrics import adjusted_rand_score as ari

print("Accuracy: ", str(1-err_rate(labels, labels_pred)))
print("NMI: ", str(nmi(labels, labels_pred, average_method="geometric")))
print("ARI: ", str(ari(labels, labels_pred)))

Accuracy:  0.6097222222222223
NMI:  0.7218195104183601
ARI:  0.5069687480066944
