In [None]:
import torch
import torch.nn as nn
from torchvision.models import resnet18
import os
import xml.etree.ElementTree as ET
import cv2
from PIL import Image
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms
from torchsummary import  summary

In [None]:
labmap = {0: "n02089078-black-and-tan_coonhound",
          1: "n02091831-Saluki",
          2: "n02092002-Scottish_deerhound",
          3: "n02095314-wire-haired_fox_terrier"}

class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = sorted(os.listdir(root_dir))
        self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}
        self.images = self.load_images()

    def load_images(self):
        images = []
        for class_name in self.classes:
            class_path = os.path.join(self.root_dir, class_name)
            for filename in os.listdir(class_path):
                image_path = os.path.join(class_path, filename)
                images.append((image_path, self.class_to_idx[class_name]))
        return images

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path, label = self.images[idx]
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, label


root_folder = '../DataSet/ProcessedDatasets/'
transform = transforms.Compose([transforms.Resize((224, 224)),
                                transforms.ToTensor()])

dog_dataset = CustomDataset(root_folder, transform=transform)

batch_size = 16
data_loader = DataLoader(dog_dataset, batch_size=batch_size, shuffle=True)

# Load the pre-trained ResNet-18 model
resnet_model = resnet18(pretrained=True)
# Remove the final fully connected layer
resnet_model = torch.nn.Sequential(*(list(resnet_model.children())[:-1]))
resnet_model = resnet_model.to('cuda')
# Set the model to evaluation mode
resnet_model.eval()


In [None]:

summary(resnet_model,(3,224,224))


In [None]:
batch_size = 32
data_loader = DataLoader(dog_dataset, batch_size=batch_size, shuffle=False)

# Extract features using the pre-trained ResNet-18 model
all_features = []
all_labels = []


with torch.no_grad():
    for images, labels in data_loader:
        images, labels = images.to('cuda'), labels.to('cuda')
        features = resnet_model(images)
        all_features.append(features.cpu().squeeze().numpy())
        all_labels.append(labels.cpu().numpy())

# Concatenate features and labels
all_features = np.concatenate(all_features, axis=0)
all_labels = np.concatenate(all_labels, axis=0)

In [None]:
pca = PCA(n_components=2)
principalComponents_dog = pca.fit_transform(all_features)

In [None]:
from sklearn.cluster import KMeans, SpectralClustering, Birch
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# (a) K-means clustering
kmeans = KMeans(n_clusters=4, init='random')
kmeans_labels = kmeans.fit_predict(principalComponents_dog)

# (b) KMeans with init='k-means++'
kmeans_pp = KMeans(n_clusters=4, init='k-means++')
kmeans_pp_labels = kmeans_pp.fit_predict(principalComponents_dog)

# (c) Bisecting K-means
bisecting_kmeans = Birch(n_clusters=4, threshold=0.01, branching_factor=50)
bisecting_kmeans_labels = bisecting_kmeans.fit_predict(principalComponents_dog)

# (d) Spectral clustering
spectral_clustering = SpectralClustering(n_clusters=4)
spectral_labels = spectral_clustering.fit_predict(principalComponents_dog)

# Plot the results
plt.figure(figsize=(12, 8))

# Plot K-means clustering
plt.subplot(221)
plt.scatter(principalComponents_dog[:, 0], principalComponents_dog[:, 1], c=kmeans_labels, cmap='viridis')
plt.title('K-means Clustering')

# Plot KMeans with init='k-means++'
plt.subplot(222)
plt.scatter(principalComponents_dog[:, 0], principalComponents_dog[:, 1], c=kmeans_pp_labels, cmap='viridis')
plt.title('KMeans with init=\'k-means++\'')

# Plot Bisecting K-means
plt.subplot(223)
plt.scatter(principalComponents_dog[:, 0], principalComponents_dog[:, 1], c=bisecting_kmeans_labels, cmap='viridis')
plt.title('Bisecting K-means')

# Plot Spectral clustering
plt.subplot(224)
plt.scatter(principalComponents_dog[:, 0], principalComponents_dog[:, 1], c=spectral_labels, cmap='viridis')
plt.title('Spectral Clustering')

plt.show()


In [None]:
from sklearn.cluster import DBSCAN, AgglomerativeClustering
from sklearn.metrics import fowlkes_mallows_score, silhouette_score

# (e) DBSCAN
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan_labels = dbscan.fit_predict(principalComponents_dog)

# (f) Agglomerative clustering
# (f) Single link (MIN)
single_link = AgglomerativeClustering(n_clusters=4, linkage='single')
single_link_labels = single_link.fit_predict(principalComponents_dog)

# (g) Complete link (MAX)
complete_link = AgglomerativeClustering(n_clusters=4, linkage='complete')
complete_link_labels = complete_link.fit_predict(principalComponents_dog)

# (h) Group Average
group_average = AgglomerativeClustering(n_clusters=4, linkage='average')
group_average_labels = group_average.fit_predict(principalComponents_dog)

# (i) Ward's method
ward = AgglomerativeClustering(n_clusters=4, linkage='ward')
ward_labels = ward.fit_predict(principalComponents_dog)

# Clustering evaluation metrics
def evaluate_clustering(labels, true_labels):
    fowlkes_mallows = fowlkes_mallows_score(true_labels, labels)
    silhouette = silhouette_score(principalComponents_dog, labels)
    return fowlkes_mallows, silhouette

# Ground truth labels (assuming you have them)
true_labels = all_labels

# Evaluate DBSCAN
dbscan_scores = evaluate_clustering(dbscan_labels, true_labels)



In [None]:

# Evaluate Agglomerative clustering methods
single_link_scores = evaluate_clustering(single_link_labels, true_labels)
complete_link_scores = evaluate_clustering(complete_link_labels, true_labels)
group_average_scores = evaluate_clustering(group_average_labels, true_labels)
ward_scores = evaluate_clustering(ward_labels, true_labels)

# Print the evaluation scores
print("DBSCAN Scores (Fowlkes-Mallows, Silhouette):", dbscan_scores)
print("Single Link Scores (Fowlkes-Mallows, Silhouette):", single_link_scores)
print("Complete Link Scores (Fowlkes-Mallows, Silhouette):", complete_link_scores)
print("Group Average Scores (Fowlkes-Mallows, Silhouette):", group_average_scores)
print("Ward Scores (Fowlkes-Mallows, Silhouette):", ward_scores)

# Rank methods based on Fowlkes-Mallows index
methods_fm_rank = sorted([(dbscan_scores[0], 'DBSCAN'),
                          (single_link_scores[0], 'Single Link'),
                          (complete_link_scores[0], 'Complete Link'),
                          (group_average_scores[0], 'Group Average'),
                          (ward_scores[0], "Ward's Method")], reverse=True)

# Rank methods based on Silhouette Coefficient
methods_silhouette_rank = sorted([(dbscan_scores[1], 'DBSCAN'),
                                 (single_link_scores[1], 'Single Link'),
                                 (complete_link_scores[1], 'Complete Link'),
                                 (group_average_scores[1], 'Group Average'),
                                 (ward_scores[1], "Ward's Method")], reverse=True)

# Print the rankings
print("\nRankings based on Fowlkes-Mallows Index:")
for rank, method in enumerate(methods_fm_rank, 1):
    print(f"{rank}. {method[1]}")

print("\nRankings based on Silhouette Coefficient:")
for rank, method in enumerate(methods_silhouette_rank, 1):
    print(f"{rank}. {method[1]}")