In [1]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import mobilenet, resnet, densenet, efficientnet, vgg19
from tensorflow.keras.models import Model
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN, MeanShift, estimate_bandwidth, AffinityPropagation
import hdbscan
from sklearn import metrics
import umap

2023-12-06 17:17:52.995378: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-12-06 17:17:53.087100: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-06 17:17:53.459319: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/zach/miniconda3/envs/tf/lib/:/home/zach/miniconda3/envs/tf/lib/python3.10/site-packages/nvidia/cudnn/lib
2023-12-06 17:17:53.459364: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could no

In [2]:
def load_and_preprocess_image(img_path, preprocess_input_func, target_size=(224, 224)):
    img = image.load_img(img_path, target_size=target_size)
    img = image.img_to_array(img)
    img = preprocess_input_func(img)
    return np.expand_dims(img, axis=0)

In [3]:
def create_feature_model(model_fn, preprocess_input_func):
    base_model = model_fn(weights='imagenet', include_top=False)
    feature_model = Model(inputs=base_model.input, outputs=base_model.output)
    return feature_model, preprocess_input_func

In [4]:
def extract_features_from_folder(folder_path, model_fn, preprocess_input_func):
    feature_model, preprocess_input = create_feature_model(model_fn, preprocess_input_func)
    features = []
    for img_name in os.listdir(folder_path):
        if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
            img_path = os.path.join(folder_path, img_name)
            img = load_and_preprocess_image(img_path, preprocess_input)
            feature = feature_model.predict(img)
            features.append(feature.flatten())
    return np.array(features)

In [5]:
models = {
    'mobilenet': (mobilenet.MobileNet, mobilenet.preprocess_input),
    'resnet': (resnet.ResNet50, resnet.preprocess_input),
    'densenet': (densenet.DenseNet121, densenet.preprocess_input),
    'efficientnet': (efficientnet.EfficientNetB0, efficientnet.preprocess_input),
    'vgg19': (vgg19.VGG19, vgg19.preprocess_input),
}

img_dir = 'dataset/objects/train_0'

In [6]:
all_features = {}
for model_name, (model_fn, preprocess_input_func) in models.items():
    all_features[model_name] = extract_features_from_folder(img_dir, model_fn, preprocess_input_func)



2023-12-06 17:17:56.392439: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-12-06 17:17:56.407497: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-12-06 17:17:56.407658: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-12-06 17:17:56.408040: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags



2023-12-06 17:17:59.144426: I tensorflow/stream_executor/cuda/cuda_blas.cc:1614] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.




In [7]:
reduced_features = {}
for model_name, features in all_features.items():
    features = StandardScaler().fit_transform(features)
    
    pca = PCA(n_components=0.9)
    reduced_features[(model_name, 'pca')] = pca.fit_transform(features)

    tsne = TSNE(n_components=2)
    reduced_features[(model_name, 'tsne')] = tsne.fit_transform(features)

    umap_reducer = umap.UMAP(n_neighbors=15, n_components=2)
    reduced_features[(model_name, 'umap')] = umap_reducer.fit_transform(features)


In [8]:
def calculate_accuracy(ground_truth_clusters, predicted_clusters):
    unique_predicted_clusters = len(set(predicted_clusters)) - (1 if -1 in predicted_clusters else 0)
    accuracy = unique_predicted_clusters / ground_truth_clusters
    return accuracy

In [9]:
results = {}
ground_truth_clusters = 10 

for (model_name, reduction_technique), features in reduced_features.items():
    # DBSCAN
    db = DBSCAN(eps=0.5, min_samples=5).fit(features)
    db_labels = db.labels_

    # HDBSCAN
    hdb = hdbscan.HDBSCAN(min_cluster_size=5).fit(features)
    hdb_labels = hdb.labels_

    # Mean Shift
    bandwidth = estimate_bandwidth(features, quantile=0.2)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True).fit(features)
    ms_labels = ms.labels_

    # Affinity Propagation
    af = AffinityPropagation().fit(features)
    af_labels = af.labels_

    # Store results
    for algorithm, labels in [('dbscan', db_labels), ('hdbscan', hdb_labels), 
                              ('meanshift', ms_labels), ('affinity', af_labels)]:
        key = (model_name, reduction_technique, algorithm)
        silhouette = metrics.silhouette_score(features, labels) if len(set(labels)) > 1 else -1
        results[key] = {'silhouette_score': silhouette}

        # accuracy = calculate_accuracy(ground_truth_clusters, labels)
        # results[key] = {'silhouette_score': silhouette, 'accuracy': accuracy}

In [11]:
results_df = pd.DataFrame.from_dict(results, orient='index').reset_index()
results_df.columns = ['Model', 'Reduction Technique', 'Algorithm', 'Silhouette Score']  #, 'Accuracy'
results_df.sort_values(by='Silhouette Score', ascending=False, inplace=True)

results_df.reset_index(drop=True, inplace=True)
results_df.index += 1

results_df['Rank'] = results_df.index

print(results_df.head())

          Model Reduction Technique  Algorithm  Silhouette Score  Rank
1      densenet                umap    hdbscan          0.428840     1
2      densenet                umap   affinity          0.424968     2
3  efficientnet                umap  meanshift          0.385708     3
4        resnet                umap  meanshift          0.384535     4
5  efficientnet                umap   affinity          0.381132     5


In [13]:
try:
    results_df.head().to_csv('permutation_results.csv', mode='a', index=False, header=False)

except:
    results_df.head().to_csv('permutation_results.csv', mode='w+', index=False, header=True)