In [1]:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.image as img
import os
from matplotlib import pyplot
import seaborn as sns
from collections import Counter

from sklearn.model_selection import GridSearchCV, cross_val_score, KFold, cross_val_predict, train_test_split
from sklearn.metrics import recall_score, precision_score, accuracy_score, f1_score, classification_report

from sklearn.cluster import KMeans
import pandas as pd

In [2]:
all_images = []
all_image_files = []

In [3]:
dataset_folder_1 = "Datasets/coil-20/coil-20-proc"
image_files_1 = os.listdir(dataset_folder_1)

for each_image in image_files_1:
    if each_image.endswith(".png"):
        im = img.imread(f"{dataset_folder_1}/{each_image}")
        all_images.append(im)

In [4]:
all_image_files = image_files_1

In [5]:
y_true = [int(file.split("__")[0].replace("obj","")) for file in image_files_1]

In [6]:
X = np.vstack([img.flatten() for img in all_images])

In [7]:
X = pd.DataFrame(X)

# Add clusters information to use

In [8]:
kmeans = KMeans(n_clusters = 20, init = "k-means++", random_state = 42,n_init=10, algorithm='elkan')
kmeans.fit(X)
cluster_labels = kmeans.predict(X)

In [9]:
X['clusters'] = cluster_labels

# Train test Split

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y_true, test_size=0.30, random_state=101, stratify=y_true)

In [11]:
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.50, random_state=101, stratify=y_test)

# Evaluate Model Performance

In [12]:
def evaluate_perf(y_test, y_pred):
    recall = recall_score(y_test, y_pred, average='micro')
    precision = precision_score(y_test, y_pred, average='micro')
    f1 = f1_score(y_test, y_pred, average='micro')
    report = classification_report(y_test, y_pred)
    print (f"""Recall: {recall},
precision: {precision},
f1: {f1},
Clf Report: 
{report}
""")

In [13]:
from sklearn.neural_network import MLPClassifier

# 1. Without Reducing Dimensionality

## a. Without using Cluster information

In [14]:
mlp = MLPClassifier()
mlp.fit(X_train.iloc[:,:-1], y_train)

In [15]:
y_pred = mlp.predict(X_test.iloc[:,:-1])
evaluate_perf(y_test, y_pred)

Recall: 0.9953703703703703,
precision: 0.9953703703703703,
f1: 0.9953703703703703,
Clf Report: 
              precision    recall  f1-score   support

           1       0.92      1.00      0.96        11
           2       1.00      1.00      1.00        11
           3       1.00      1.00      1.00        10
           4       1.00      1.00      1.00        11
           5       1.00      1.00      1.00        11
           6       1.00      1.00      1.00        11
           7       1.00      1.00      1.00        11
           8       1.00      1.00      1.00        10
           9       1.00      1.00      1.00        11
          10       1.00      1.00      1.00        11
          11       1.00      1.00      1.00        11
          12       1.00      1.00      1.00        11
          13       1.00      1.00      1.00        11
          14       1.00      1.00      1.00        11
          15       1.00      1.00      1.00        11
          16       1.00      1.00      

## b. Without using Cluster information

In [16]:
mlp = MLPClassifier()
mlp.fit(X_train.values, y_train)

In [17]:
y_pred = mlp.predict(X_test.values)
evaluate_perf(y_test, y_pred)

Recall: 0.9861111111111112,
precision: 0.9861111111111112,
f1: 0.9861111111111112,
Clf Report: 
              precision    recall  f1-score   support

           1       0.79      1.00      0.88        11
           2       1.00      1.00      1.00        11
           3       1.00      1.00      1.00        10
           4       1.00      1.00      1.00        11
           5       1.00      1.00      1.00        11
           6       1.00      1.00      1.00        11
           7       1.00      1.00      1.00        11
           8       1.00      1.00      1.00        10
           9       1.00      1.00      1.00        11
          10       1.00      1.00      1.00        11
          11       1.00      1.00      1.00        11
          12       1.00      1.00      1.00        11
          13       1.00      1.00      1.00        11
          14       1.00      1.00      1.00        11
          15       1.00      1.00      1.00        11
          16       1.00      1.00      

# 2. Reduce Dimensions using Linear Method

In [None]:
n_components_range = range(0, 100, 10)  # Adjust the maximum value as needed

cv_scores = {}

for n_components in n_components_range:
    if n_components:
        print (n_components)
        fa = FactorAnalysis(n_components=n_components)
        scores = cross_val_score(fa, X_train.values, cv=3, n_jobs=-1)  # Adjust the number of folds (cv) as needed
        cv_scores[n_components] = np.mean(scores)

# Find the value of n_components with the highest cross-validation score
optimal_n_components = max(cv_scores, key=cv_scores.get)
print("Optimal value of n_components:", optimal_n_components)

10


In [20]:
from sklearn.decomposition import FactorAnalysis
from sklearn.model_selection import cross_val_score
import numpy as np

## a. Without Cluster Information

In [21]:
fa = FactorAnalysis(n_components=64)  # Choose the desired number of components

# Fit the Factor Analysis model to the image data
X_train_fa = fa.fit_transform(X_train.iloc[:,:-1].values) 
X_test_fa = fa.transform(X_test.iloc[:,:-1].values)

In [24]:
mlp = MLPClassifier()
mlp.fit(X_train_fa, y_train)
y_pred = mlp.predict(X_test_fa)
evaluate_perf(y_test, y_pred)

Recall: 0.9675925925925926,
precision: 0.9675925925925926,
f1: 0.9675925925925926,
Clf Report: 
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        11
           2       0.92      1.00      0.96        11
           3       0.83      1.00      0.91        10
           4       0.92      1.00      0.96        11
           5       0.89      0.73      0.80        11
           6       1.00      0.82      0.90        11
           7       0.92      1.00      0.96        11
           8       1.00      1.00      1.00        10
           9       1.00      1.00      1.00        11
          10       1.00      1.00      1.00        11
          11       1.00      1.00      1.00        11
          12       1.00      1.00      1.00        11
          13       1.00      1.00      1.00        11
          14       1.00      1.00      1.00        11
          15       1.00      1.00      1.00        11
          16       1.00      1.00      

## b. With Cluster Information

In [25]:
fa = FactorAnalysis(n_components=64)  # Choose the desired number of components

# Fit the Factor Analysis model to the image data
X_train_fa = fa.fit_transform(X_train.values)
X_test_fa = fa.transform(X_test.values)

In [26]:
mlp = MLPClassifier()
mlp.fit(X_train_fa, y_train)
y_pred = mlp.predict(X_test_fa)
evaluate_perf(y_test, y_pred)

Recall: 0.9675925925925926,
precision: 0.9675925925925926,
f1: 0.9675925925925926,
Clf Report: 
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        11
           2       1.00      1.00      1.00        11
           3       0.77      1.00      0.87        10
           4       1.00      1.00      1.00        11
           5       0.90      0.82      0.86        11
           6       1.00      0.82      0.90        11
           7       0.92      1.00      0.96        11
           8       1.00      1.00      1.00        10
           9       1.00      1.00      1.00        11
          10       1.00      1.00      1.00        11
          11       1.00      1.00      1.00        11
          12       1.00      1.00      1.00        11
          13       1.00      1.00      1.00        11
          14       0.92      1.00      0.96        11
          15       1.00      1.00      1.00        11
          16       1.00      1.00      

# Reduce Dimensions with Non-Linear Method

In [29]:
from sklearn.manifold import TSNE

## a. Without using Cluster information

In [38]:
n_components = 3  # Choose the desired number of components
tsne = TSNE(n_components=n_components, random_state=42)
X_tsne_train = tsne.fit_transform(X_train.iloc[:,:-1].values)
X_tsne_test = tsne.fit_transform(X_test.iloc[:,:-1].values)

In [39]:
mlp = MLPClassifier()
mlp.fit(X_tsne_train, y_train)
y_pred = mlp.predict(X_tsne_test)
evaluate_perf(y_test, y_pred)

Recall: 0.023148148148148147,
precision: 0.023148148148148147,
f1: 0.023148148148148147,
Clf Report: 
              precision    recall  f1-score   support

           1       0.00      0.00      0.00        11
           2       0.00      0.00      0.00        11
           3       0.57      0.40      0.47        10
           4       0.00      0.00      0.00        11
           5       0.00      0.00      0.00        11
           6       0.00      0.00      0.00        11
           7       0.00      0.00      0.00        11
           8       0.00      0.00      0.00        10
           9       0.06      0.09      0.07        11
          10       0.00      0.00      0.00        11
          11       0.00      0.00      0.00        11
          12       0.00      0.00      0.00        11
          13       0.00      0.00      0.00        11
          14       0.00      0.00      0.00        11
          15       0.00      0.00      0.00        11
          16       0.00      0.00



## b. With using Cluster Information

In [40]:
n_components = 3  # Choose the desired number of components
tsne = TSNE(n_components=n_components, random_state=42)
X_tsne_train = tsne.fit_transform(X_train.values)
X_tsne_test = tsne.fit_transform(X_test.values)

In [41]:
mlp = MLPClassifier()
mlp.fit(X_tsne_train, y_train)
y_pred = mlp.predict(X_tsne_test)
evaluate_perf(y_test, y_pred)

Recall: 0.046296296296296294,
precision: 0.046296296296296294,
f1: 0.046296296296296294,
Clf Report: 
              precision    recall  f1-score   support

           1       0.00      0.00      0.00        11
           2       0.03      0.09      0.05        11
           3       0.00      0.00      0.00        10
           4       0.00      0.00      0.00        11
           5       0.00      0.00      0.00        11
           6       0.00      0.00      0.00        11
           7       0.09      0.09      0.09        11
           8       0.00      0.00      0.00        10
           9       0.15      0.18      0.17        11
          10       0.00      0.00      0.00        11
          11       0.00      0.00      0.00        11
          12       0.00      0.00      0.00        11
          13       0.00      0.00      0.00        11
          14       0.67      0.55      0.60        11
          15       0.00      0.00      0.00        11
          16       0.00      0.00

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
