# Importing the libraries

In [10]:
# Import the libraries
import zipfile
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_openml
from sklearn.metrics import accuracy_score, classification_report
from PIL import Image
import random
import umap

# Specify the local path where the QuickDraw folder is stored
path = '/home/mapacheroja/Desktop/Tarea1/QuickDraw-10/';

# Load mapping.txt, train.txt and test.txt files
mapping_file = os.path.join(path, 'mapping.txt')
with open(mapping_file, 'r') as f:
  mapping = {line.split()[0]: int(line.split()[1]) for line in f.readlines()}

train_file = os.path.join(path, 'train.txt')
with open(train_file, 'r') as f:
  train_data = [line.strip().split() for line in f.readlines()]

test_file = os.path.join(path, 'test.txt')
with open(test_file, 'r') as f:
  test_data = [line.strip().split() for line in f.readlines()]

# Print sample of train and test data
print(f"Train Data Sample: {train_data[:5]}")
print(f"Test Data Sample: {test_data[:5]}")

Train Data Sample: [['images/sink/262_00048051.jpg', '0'], ['images/sink/262_00202339.jpg', '0'], ['images/sink/262_00043468.jpg', '0'], ['images/sink/262_00119722.jpg', '0'], ['images/sink/262_00072354.jpg', '0']]
Test Data Sample: [['images/sink/262_00143983.jpg', '0'], ['images/sink/262_00082171.jpg', '0'], ['images/sink/262_00114430.jpg', '0'], ['images/sink/262_00107634.jpg', '0'], ['images/sink/262_00119974.jpg', '0']]


In [3]:
# Number of samples to randomly select from the training data
num_samples = 1000  # Adjust this to the desired number of samples

np.random.seed(22)

# Randomly sample the indices of the training data
random_indices = np.random.choice(len(train_data), size=num_samples, replace=False)

# Select the images and labels corresponding to the random indices
train_data_subset = [train_data[i] for i in random_indices]
train_images_subset = [os.path.join(path, item[0]) for item in train_data_subset]
train_labels_subset = [item[1] for item in train_data_subset]

print(f"Subset Train Data Sample: {train_data_subset[:5]}")

Subset Train Data Sample: [['images/spreadsheet/278_00087107.jpg', '6'], ['images/sink/262_00001566.jpg', '0'], ['images/trombone/324_00106886.jpg', '9'], ['images/blackberry/033_00037365.jpg', '5'], ['images/teapot/302_00012847.jpg', '3']]


In [4]:
# Load and preprocess the images
train_images = []
for img_path in train_images_subset:
    img = Image.open(img_path).convert('L')  # Convert to grayscale
    img = img.resize((256, 256))  # Ensure size (in case needed)
    img_array = np.array(img).flatten()  # Flatten the 256x256 image into a 65536-length vector
    train_images.append(img_array)

train_images = np.array(train_images)
train_labels = np.array(train_labels_subset, dtype=int)

print(f"Shape of train_images: {train_images.shape}")
print(f"Shape of train_labels: {train_labels.shape}")

Shape of train_images: (1000, 65536)
Shape of train_labels: (1000,)


In [5]:
# Prepare test images and labels
test_images = []
test_labels = []

for item in test_data:
    img_path = os.path.join(path, item[0])  # <- no extra 'images' here
    label = item[1]
    img = Image.open(img_path).convert('L')
    img = img.resize((256, 256))
    img_array = np.array(img).flatten()
    test_images.append(img_array)
    test_labels.append(label)

test_images = np.array(test_images)
test_labels = np.array(test_labels, dtype=int)

print(f"Shape of test_images: {test_images.shape}")
print(f"Shape of test_labels: {test_labels.shape}")

Shape of test_images: (1166, 65536)
Shape of test_labels: (1166,)


In [6]:
scaler = StandardScaler()
train_images_scaled = scaler.fit_transform(train_images)
test_images_scaled = scaler.transform(test_images)

print("Ready")

Ready


In [8]:
# 1. SVM with RBK Kernel

# Train SVM with RBF kernel and raw image vectors
svm_rbf_raw = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_rbf_raw.fit(train_images_scaled, train_labels)

# Predictions and evaluation
predictions_rbf_raw = svm_rbf_raw.predict(test_images_scaled)
accuracy_rbf_raw = accuracy_score(test_labels, predictions_rbf_raw)
print(f"Accuracy with RBF kernel + Raw Vectors: {accuracy_rbf_raw:.4f}")

# Accuracy per class
print(classification_report(test_labels, predictions_rbf_raw))

Accuracy with RBF kernel + Raw Vectors: 0.5961
              precision    recall  f1-score   support

           0       0.72      0.51      0.60       120
           1       0.65      0.60      0.62       122
           2       0.56      0.56      0.56       131
           3       0.62      0.73      0.67       116
           4       0.41      0.64      0.50       105
           5       0.50      0.66      0.57       103
           6       0.62      0.59      0.60       116
           7       0.78      0.46      0.58       121
           8       0.70      0.85      0.77       130
           9       0.53      0.32      0.40       102

    accuracy                           0.60      1166
   macro avg       0.61      0.59      0.59      1166
weighted avg       0.62      0.60      0.59      1166



In [7]:
# Reduce dimensionality with PCA (256 components)
pca = PCA(n_components=256)
train_images_pca = pca.fit_transform(train_images_scaled)
test_images_pca = pca.transform(test_images_scaled)

# Train SVM with RBF kernel on PCA features
svm_rbf_pca = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_rbf_pca.fit(train_images_pca, train_labels)

# Predictions and evaluation
predictions_rbf_pca = svm_rbf_pca.predict(test_images_pca)
accuracy_rbf_pca = accuracy_score(test_labels, predictions_rbf_pca)
print(f"Accuracy with RBF kernel + PCA: {accuracy_rbf_pca:.4f}")

# Accuracy per class
print(classification_report(test_labels, predictions_rbf_pca))

Accuracy with RBF kernel + PCA: 0.5626
              precision    recall  f1-score   support

           0       0.69      0.54      0.61       120
           1       0.69      0.56      0.62       122
           2       0.48      0.43      0.45       131
           3       0.52      0.79      0.63       116
           4       0.38      0.81      0.52       105
           5       0.74      0.51      0.61       103
           6       0.69      0.27      0.39       116
           7       0.79      0.51      0.62       121
           8       0.61      0.85      0.71       130
           9       0.42      0.32      0.37       102

    accuracy                           0.56      1166
   macro avg       0.60      0.56      0.55      1166
weighted avg       0.60      0.56      0.55      1166



In [28]:
# Reduce dimensionality with UMAP (256 components)
umap_model = umap.UMAP(n_components=256)
train_images_umap = umap_model.fit_transform(train_images_scaled)
test_images_umap = umap_model.transform(test_images_scaled)

# Train SVM with RBF kernel on UMAP features
svm_rbf_umap = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_rbf_umap.fit(train_images_umap, train_labels)

# Predictions and evaluation
predictions_rbf_umap = svm_rbf_umap.predict(test_images_umap)
accuracy_rbf_umap = accuracy_score(test_labels, predictions_rbf_umap)
print(f"Accuracy with RBF kernel + UMAP: {accuracy_rbf_umap:.4f}")

# Accuracy per class
print(classification_report(test_labels, predictions_rbf_umap))

Accuracy with RBF kernel + UMAP: 0.1973
              precision    recall  f1-score   support

           0       0.24      0.04      0.07       120
           1       0.00      0.00      0.00       122
           2       0.33      0.01      0.01       131
           3       0.00      0.00      0.00       116
           4       0.00      0.00      0.00       105
           5       0.72      0.30      0.42       103
           6       0.00      0.00      0.00       116
           7       0.00      0.00      0.00       121
           8       0.64      0.73      0.68       130
           9       0.10      0.96      0.19       102

    accuracy                           0.20      1166
   macro avg       0.20      0.20      0.14      1166
weighted avg       0.21      0.20      0.14      1166



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [29]:
# Train SVM with Linear kernel and raw image vectors
svm_sigmoid_raw = SVC(kernel='sigmoid', C=1.0)
svm_sigmoid_raw.fit(train_images_scaled, train_labels)

# Predictions and evaluation
predictions_sigmoid_raw = svm_sigmoid_raw.predict(test_images_scaled)
accuracy_sigmoid_raw = accuracy_score(test_labels, predictions_sigmoid_raw)
print(f"Accuracy with Sigmoid kernel + Raw Vectors: {accuracy_sigmoid_raw:.4f}")

# Accuracy per class
print(classification_report(test_labels, predictions_sigmoid_raw))

Accuracy with Sigmoid kernel + Raw Vectors: 0.6063
              precision    recall  f1-score   support

           0       0.64      0.68      0.66       120
           1       0.58      0.57      0.58       122
           2       0.68      0.57      0.62       131
           3       0.63      0.68      0.65       116
           4       0.50      0.44      0.47       105
           5       0.67      0.64      0.66       103
           6       0.66      0.47      0.55       116
           7       0.62      0.59      0.60       121
           8       0.74      0.78      0.76       130
           9       0.39      0.62      0.48       102

    accuracy                           0.61      1166
   macro avg       0.61      0.60      0.60      1166
weighted avg       0.62      0.61      0.61      1166



In [30]:
# Train SVM with Linear kernel on PCA features
svm_sigmoid_pca = SVC(kernel='sigmoid', C=1.0)
svm_sigmoid_pca.fit(train_images_pca, train_labels)

# Predictions and evaluation
predictions_sigmoid_pca = svm_sigmoid_pca.predict(test_images_pca)
accuracy_sigmoid_pca = accuracy_score(test_labels, predictions_sigmoid_pca)
print(f"Accuracy with Sigmoid kernel + PCA: {accuracy_sigmoid_pca:.4f}")

# Accuracy per class
print(classification_report(test_labels, predictions_sigmoid_pca))

Accuracy with Sigmoid kernel + PCA: 0.6021
              precision    recall  f1-score   support

           0       0.64      0.65      0.65       120
           1       0.59      0.57      0.58       122
           2       0.70      0.53      0.61       131
           3       0.65      0.66      0.66       116
           4       0.52      0.45      0.48       105
           5       0.62      0.66      0.64       103
           6       0.60      0.47      0.53       116
           7       0.63      0.60      0.61       121
           8       0.74      0.80      0.77       130
           9       0.38      0.60      0.46       102

    accuracy                           0.60      1166
   macro avg       0.61      0.60      0.60      1166
weighted avg       0.61      0.60      0.60      1166



In [31]:
# Train SVM with Polynomial kernel on UMAP features
svm_poly_umap = SVC(kernel='poly', degree=3, C=1.0, gamma='scale')  # degree=3 for cubic kernel
svm_poly_umap.fit(train_images_umap, train_labels)

# Predictions and evaluation
predictions_poly_umap = svm_poly_umap.predict(test_images_umap)
accuracy_poly_umap = accuracy_score(test_labels, predictions_poly_umap)
print(f"Accuracy with Polynomial kernel + UMAP: {accuracy_poly_umap:.4f}")

# Accuracy per class
print(classification_report(test_labels, predictions_poly_umap))

Accuracy with Polynomial kernel + UMAP: 0.3019
              precision    recall  f1-score   support

           0       0.14      0.14      0.14       120
           1       0.21      0.42      0.28       122
           2       0.32      0.05      0.08       131
           3       0.30      0.35      0.32       116
           4       0.20      0.14      0.17       105
           5       0.51      0.31      0.39       103
           6       0.18      0.14      0.16       116
           7       0.38      0.49      0.43       121
           8       0.68      0.55      0.61       130
           9       0.26      0.42      0.32       102

    accuracy                           0.30      1166
   macro avg       0.32      0.30      0.29      1166
weighted avg       0.32      0.30      0.29      1166

