## **1. Import the necessary libraries** ---

In [11]:
import matplotlib
import sklearn
import numpy as np
import pandas as pd
import sklearn.metrics as metrics
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV
import matplotlib.pyplot as plt
import tensorflow as tf


from tensorflow.keras.callbacks import ModelCheckpoint,CSVLogger
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Activation
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import plot_model
from tensorflow.keras.datasets import cifar10
from tensorflow.keras import optimizers
from tensorflow.keras import regularizers



print("Versions of key libraries")
print("---")
print("tensorflow: ", tf.__version__)
print("numpy:      ", np.__version__)
print("matplotlib: ", matplotlib.__version__)
print("sklearn:    ", sklearn.__version__)

Versions of key libraries
---
tensorflow:  2.3.0
numpy:       1.20.3
matplotlib:  3.4.2
sklearn:     0.24.2


## **2.Create a function to plot image without axis** ---

In [12]:
def implt(img):
    plt.figure()
    plt.imshow(img)
    plt.axis('off')

print(implt)

<function implt at 0x000001D28E0CFEE8>


## **3. Set matplotlib to have seaborn plot style**

In [13]:
plt.style.use('seaborn')                   # if want to use the default style, set 'classic'
plt.rcParams['ytick.right']     = True
plt.rcParams['ytick.labelright']= True
plt.rcParams['ytick.left']      = False
plt.rcParams['ytick.labelleft'] = False
plt.rcParams['figure.figsize']  = [7,7]   # Set the figure size to be 7 inch for (width,height)

print("Matplotlib setup completes.")

Matplotlib setup completes.


## **4. Prepare data for training and testing**
---
* Step 1: Load the images
* Step 2: Check the shape and type of the data
* Step 3: Convert the data into float32 and rescale the values from the range of 0\~255 into 0\~1
* Step 4: Retrieve the row size and the column size of each image
* Step 5: Perform one-hot enconding on the labels
* Step 6: Retrieve the number of classes in this problem

In [15]:
from imutils import paths
import os, sys
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

def collect_images_and_labels(path_to_images):
    # in seguito, usare https://keras.io/api/preprocessing/image/
    """
        :param path_to_images should be the root folder, in which there is a folder for each label, and the folder's name is
        the label itself
        :return: a list with images and a list with labels
        """
    data = []
    labels = []

    for img_path in list(paths.list_images(path_to_images)):
        # extract the class label from the filename
        label = img_path.split(os.path.sep)[-2]

        # load the input image as (128x128) and preprocess it
        image = load_img(img_path, target_size=(128, 128))
        image = img_to_array(image)
        #image = preprocess_input(image)

        # update the data and labels lists, respectively
        data.append(image)
        labels.append(label)

    return data, labels

def preprocess_labels(labels):
    """
    :param labels: list of string
    :return: np array of 0/1
    """
    lb = LabelBinarizer()
    labels = lb.fit_transform(labels)
    labels = to_categorical(labels)
    return labels

def tts_split(data, labels):
    (x_train, x_test, y_train, y_test) = train_test_split(data, labels, test_size=0.20, stratify=labels, random_state=42)
    (x_train, x_test, y_train, y_test) = (np.array(x_train), np.array(x_test), np.array(y_train), np.array(y_test))
    return x_train, x_test, y_train, y_test


PATH_TO_IMAGE = "../data"
MODELS_PATH = "../models"

# Step 1

data, labels = collect_images_and_labels(PATH_TO_IMAGE)
#labels = preprocess_labels(labels)
trDat, tsDat, trLbl, tsLbl = tts_split(data, labels)
# x_train, x_test, y_train, y_test




# Step 2
print("The shape of trDat is", trDat.shape, "and the type of trDat is", trDat.dtype)
print("The shape of tsDat is", tsDat.shape, "and the type of tsDat is", tsDat.dtype)
print("")
print("The shape of trLbl is", trLbl.shape, "and the type of trLbl is", trLbl.dtype)
print("The shape of tsLbl is", tsLbl.shape, "and the type of tsLbl is", tsLbl.dtype)


# Step 3
trDat           = trDat.astype('float32')/255
tsDat           = tsDat.astype('float32')/255

# Step 4
imgrows         = trDat.shape[1]
imgclms         = trDat.shape[2]
channel         = trDat.shape[3]

# Step 5
trLbl           = preprocess_labels(trLbl)
tsLbl           = preprocess_labels(tsLbl)

# Step 6
num_classes     = tsLbl.shape[1]           

  "Palette images with Transparency expressed in bytes should be "


The shape of trDat is (6042, 128, 128, 3) and the type of trDat is float32
The shape of tsDat is (1511, 128, 128, 3) and the type of tsDat is float32

The shape of trLbl is (6042,) and the type of trLbl is <U12
The shape of tsLbl is (1511,) and the type of tsLbl is <U12


## *KNN No Hyperparameter Tuning*
___


Build and train KNN model with default parameters

In [16]:
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.decomposition import PCA # Dimensionality Reduction


trDat = trDat.reshape(6042, 128*128*3)
tsDat = tsDat.reshape(1511, 128*128*3)

# Initialize KNN model
knn = KNeighborsClassifier()

# Use training data to fit KNN model
knn.fit(trDat, trLbl)
                                                          

KNeighborsClassifier()

In [23]:
%%time
# make prediction on entire test data
predictions_set1 = knn.predict(tsDat)


Wall time: 7.5 s


In [24]:
import pickle
# Save Predictions in a pickle
pickle_out = open("predictions_knn.pickle", "wb")
pickle.dump(predictions_set1, pickle_out)
pickle_out.close()

## **Performance Metrics for No Hyperparameter Tuning**

In [28]:
#Precision, Recall, F1 Score & Classification Report, No Hyperparameter Tuning
print('KNN Precision: %.3f' % precision_score(tsLbl, predictions_set1, average='micro'))
print('KNN Recall: %.3f' % recall_score(tsLbl, predictions_set1, average='micro'))
print('KNN F1 Score: %.3f' % f1_score(tsLbl, predictions_set1, average='micro'))
print("\nNo Hyperparameter Tuning Classification Report\n", classification_report(tsLbl, predictions_set1))


KNN Precision: 0.815
KNN Recall: 0.815
KNN F1 Score: 0.815

No Hyperparameter Tuning Classification Report
               precision    recall  f1-score   support

           0       0.90      0.70      0.79       745
           1       0.76      0.92      0.84       766

   micro avg       0.82      0.82      0.82      1511
   macro avg       0.83      0.81      0.81      1511
weighted avg       0.83      0.82      0.81      1511
 samples avg       0.82      0.82      0.82      1511

