# Please run the code and check the output folders 'clusterResnet' and 'clusterSIFT' for the results!

In [2]:
import os
import cv2
import numpy as np
import shutil
import tensorflow as tf
from sklearn.cluster import KMeans, MeanShift, estimate_bandwidth

2021-12-18 16:51:00.400789: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/varun/.local/lib/python3.9/site-packages/cv2/../../lib64:
2021-12-18 16:51:00.400815: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [7]:
!git clone https://github.com/openMVG/SfM_quality_evaluation

Cloning into 'SfM_quality_evaluation'...
remote: Enumerating objects: 237, done.[K
remote: Total 237 (delta 0), reused 0 (delta 0), pack-reused 237[K
Receiving objects: 100% (237/237), 254.31 MiB | 19.43 MiB/s, done.
Resolving deltas: 100% (9/9), done.
Updating files: 100% (214/214), done.


### 1. Preparing Dataset

In [119]:
desired_folders = ['fountain-P11', 'Herz-Jesus-P8', 'entry-P10','castle-P19']

data_path = 'SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008'

if not os.path.exists('dataset'):
    os.mkdir('dataset')

for folder in desired_folders:
    images_path = os.path.join(os.path.join(data_path, folder),'images')
    for img in os.listdir(images_path):
        if img.endswith('.jpg'):
            shutil.copyfile(os.path.join(images_path, img),os.path.join('dataset',"%s-%s"%(folder,img)))

In [3]:
dataset_images = [cv2.imread("dataset/"+i) for i in os.listdir('dataset')]
dataset_images = [cv2.resize(i, (224,224))/255.0 for i in dataset_images]
dataset_images = np.array(dataset_images)

### 2. Creating SIFT data for Clustering

In [4]:
sift_data = []
sift = cv2.SIFT_create(3500)

img_labels = []

for i in os.listdir('dataset'):
    grayImg = cv2.cvtColor(cv2.imread('dataset/'+i), cv2.COLOR_BGR2GRAY)
    img_labels.append(i)
    kp, des = sift.detectAndCompute(grayImg, None)
    des = des[:3500]
    des = des.flatten()
    sift_data.append(des)

sift_data = np.array(sift_data)

#### 2.1 Clustering using KMeans and Storing the images in respective Cluster Folder

In [5]:
kmeans_sift = KMeans(4, max_iter=1000).fit(sift_data)
kmeans_sift.labels_

array([1, 2, 0, 0, 2, 2, 0, 1, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, 2, 3, 0, 1,
       2, 2, 1, 0, 1, 3, 1, 1, 3, 1, 2, 2, 3, 1, 0, 3, 1, 0, 0, 1, 3, 1,
       2, 0, 0, 1], dtype=int32)

In [6]:
if os.path.exists('clusterSIFT'):
    shutil.rmtree('clusterSIFT')
os.mkdir('clusterSIFT')

for ind, lb in enumerate(kmeans_sift.labels_):

    if not os.path.exists('clusterSIFT/'+str(lb)):
        os.mkdir('clusterSIFT/%s'%lb)
    
    shutil.copyfile('dataset/'+img_labels[ind], 'clusterSIFT/%s/%s'%(lb,img_labels[ind]))



### 3. Creating ResNEt Data for Clustering

In [7]:
resnetModel = tf.keras.applications.ResNet50V2(input_shape=(224,224,3), weights='imagenet', include_top=False)
resnetOutput = tf.keras.layers.Flatten()(resnetModel.output)
resnetModel = tf.keras.models.Model(inputs=resnetModel.input, outputs=resnetOutput)

resnet_data = resnetModel.predict(dataset_images)

2021-12-18 16:52:51.714494: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/varun/.local/lib/python3.9/site-packages/cv2/../../lib64:
2021-12-18 16:52:51.714543: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2021-12-18 16:52:51.714579: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (VarunUbuntu): /proc/driver/nvidia/version does not exist
2021-12-18 16:52:51.714849: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


#### 3.1 Clustering using KMeans and storing data in respective folders

In [8]:
kmeans_resnet = KMeans(4, max_iter=1000).fit(resnet_data)
kmeans_resnet.labels_

array([1, 1, 3, 3, 3, 3, 3, 0, 1, 3, 0, 3, 2, 1, 1, 2, 2, 3, 3, 2, 1, 0,
       3, 1, 2, 3, 3, 3, 0, 0, 2, 0, 3, 1, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0,
       3, 3, 3, 0], dtype=int32)

In [9]:
if os.path.exists('clusterResnet'):
    shutil.rmtree('clusterResnet')
os.mkdir('clusterResnet')

for ind, lb in enumerate(kmeans_resnet.labels_):

    if not os.path.exists('clusterResnet/'+str(lb)):
        os.mkdir('clusterResnet/%s'%lb)
    
    shutil.copyfile('dataset/'+img_labels[ind], 'clusterResnet/%s/%s'%(lb,img_labels[ind]))



#### Failed classification for Mean Shift Algorithm

In [10]:
mean_shift = MeanShift(bandwidth=estimate_bandwidth(sift_data, quantile=0.2),max_iter=1000, bin_seeding=True)
mean_shift.fit(sift_data)
mean_shift.labels_

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0])

## Conclusion

So based on the results obtained from above, we can see that KMeans is performing well in clustering compared to the Mean Shift Algorithm, but using the SIFT data to cluster brings some erroneous clusters which are not of same category. But while using a pretrained ResNet model for predicting image descriptors, we were able to succesfully cluster them into respective groups. 