In [None]:
from keras.datasets import cifar10

import joblib
import numpy as np
import cv2 as cv
from skimage.feature import hog

# visualization
%matplotlib inline
import matplotlib.pyplot as plt
from tabulate import tabulate

from sklearn.preprocessing import minmax_scale, scale
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score

# models
from sklearn.cluster import MiniBatchKMeans
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

# BoVW (Bag of Visual Words)
Dataset: <a href='https://www.cs.toronto.edu/~kriz/cifar.html' target="_blank"> CIFAR-10 </a> <br/>
Classes: airplane, automobile, bird, cat, deer, dog, frog, horse, ship, truck

## 1. Load Data

In [None]:
(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()

In [None]:
X_train = X_train.astype('uint8')
X_test = X_test.astype('uint8')

In [None]:
print(f'Total train data: {X_train.shape[0]}')
print(f'Total test data: {X_test.shape[0]}')
print(f'Image dimensions: {X_train.shape[1], X_train.shape[2], X_train.shape[3]}')

In [None]:
plt.imshow(X_train[4])
plt.title('RGB Image')

## 2. Preprocessing

### 2.1 Grayscale

In [None]:
def to_grayscale(images):
    """ Converts RGB images to grayscale.
    
    Args:
        images:
            Numpy array of images in RGB format.
    
    Returns:
        Numpy array of grayscale images.
    """
    grayscale = np.zeros((images.shape[0], images.shape[1], images.shape[2])).astype('uint8')
    
    for img_idx in range(images.shape[0]):
        grayscale[img_idx] = cv.cvtColor(images[img_idx], cv.COLOR_RGB2GRAY)
    
    return grayscale

In [None]:
X_train_grayscale = to_grayscale(X_train)
X_test_grayscale = to_grayscale(X_test)

In [None]:
print(f'Image dimensions in grayscale: {X_train_grayscale.shape[1], X_train_grayscale.shape[2]}')

In [None]:
plt.imshow(X_train_grayscale[4], cmap='gray')
plt.title('Grayscale Image')

## 3. Features

* HOG
* SIFT
* Gray Color Histogram

### 3.1 SIFT

In [None]:
def sift_features(images):
    """ Extracts image keypoints and descriptors using SIFT.
    
    Args:
        images:
            Numpy array of images in grayscale format.
    
    Returns:
        keypoints and descriptors as lists.
    """
    sift = cv.xfeatures2d.SIFT_create(contrastThreshold=0.02, sigma=0.9)
    
    keypoints = []
    descriptors = []
    
    for img_idx in range(images.shape[0]):
        kp, des = sift.detectAndCompute(images[img_idx], None)
        keypoints.append(kp)
        descriptors.append(des)
    
    return keypoints, descriptors

In [None]:
kp_sift_train, des_sift_train = sift_features(X_train_grayscale) 
kp_sift_test, des_sift_test = sift_features(X_test_grayscale)

In [None]:
img = cv.drawKeypoints(X_train_grayscale[4], kp_sift_train[4], None)

plt.imshow(img)
plt.title('SIFT keypoints')

### 3.2 HOG

In [None]:
def hog_features(images):
    """ Extracts image histogram of oriented gradients.
    
    Args:
        images:
            Numpy array of images in grayscale format.
    
    Returns:
        HOG descriptor for the image, as numpy array.
    """
    hog_descriptors = []
    
    for img_idx in range(images.shape[0]):
        fd = hog(images[img_idx], 
                 orientations=9, 
                 pixels_per_cell=(8, 8),
                 cells_per_block=(2, 2), 
                 block_norm = 'L2-Hys',
                 transform_sqrt=True,
                 feature_vector = True,
                 visualize=False)  
        hog_descriptors.append(fd)
    
    return hog_descriptors

In [None]:
des_hog_train = hog_features(X_train_grayscale)
des_hog_test = hog_features(X_test_grayscale)

### 3.3 Gray Color Histogram

In [None]:
def color_histogram(images):
    """ Extracts image gray color histogram.
    
    Args:
        images:
            Numpy array of images in grayscale format.
    
    Returns:
        Gray color histogram, as numpy array.
    """
    color_hists = []
    
    for img_idx in range(images.shape[0]):
        color_hist = cv.calcHist([images[img_idx]], [0], None, [256], [0, 256])
        color_hists.append(color_hist.ravel())
    
    return color_hists

In [None]:
color_hist_train = color_histogram(X_train_grayscale)
color_hist_test = color_histogram(X_test_grayscale)

## 4. Bag of Visual Words

### 4.1 Visual Vocabulary

In [None]:
def get_visual_words(descriptors, n_clusters=8, n_init=10, max_iter=300):
    """ Finds the vocabulary of visual words.

    To find the vocabulary of visual words, k-means is used. The centroids found by k-means
    form the vocabulary.

    Args:
        descriptors:
            descriptors, as a list of numpy arrays.
        n_clusters:
            The number of clusters to form as well as the number of centroids to generate, as int (default=8).
        n_init:
            Number of time the k-means algorithm will be run with different centroid seeds, as int (default=10).
        max_iter:
            Maximum number of iterations of the k-means algorithm for a single run, as int (default=300)

    Returns:
        Fitted k-means, holding the vocabulary of visual words .
    """
    descriptors_raw = []
    for des in descriptors:
        if des is not None:
            descriptors_raw.extend(des)

    kmeans = MiniBatchKMeans(n_clusters=n_clusters, batch_size=64, n_init=n_init, max_iter=max_iter)

    visual_words = kmeans.fit(descriptors_raw)

    return visual_words

In [None]:
visual_words = get_visual_words(descriptors=des_sift_train, n_clusters=500)

### 4.2 Vector Representations

In [None]:
def get_vector_representation(visual_words, descriptors):
    """ Computes the vector representation of images.

    The representation is based on the image descriptors and a predefined BoVW model. Specifically,
    it's a histogram of the frequencies of visual words from the vocabulary of the BoVW.

    Args:
        visual_words:
            precomputed k-means model with the vocabulary of visual words.
        descriptors:
            descriptors, as list of numpy arrays.

    Returns:
        Vector representations of images, as list of numpy array.
    """
    histograms = []
    for desc in descriptors:
        hist = np.zeros(visual_words.cluster_centers_.shape[0])
        if desc is not None:
            for d in desc:
                # find the cluster each descriptor is close to
                cluster_idx = visual_words.predict([d.astype(float)])
                hist[cluster_idx] += 1
            histograms.append(hist)
    
    return histograms

In [None]:
des_sift_train = get_vector_representation(visual_words, des_sift_train)
des_sift_test = get_vector_representation(visual_words, des_sift_test)

## 5. Early Fusion

### 5.1 Early Fusion: HOG + GCH

In [None]:
X_train_hog_gch = []
for idx in range(X_train_grayscale.shape[0]):
    fused = np.concatenate((des_hog_train[idx], color_hist_train[idx]), axis=None)
    X_train_hog_gch.append(fused)

X_test_hog_gch = []
for idx in range(X_test_grayscale.shape[0]):
    fused = np.concatenate((des_hog_test[idx], color_hist_test[idx]), axis=None)
    X_test_hog_gch.append(fused)

In [None]:
print(f'Number of features: {X_train_hog_gch[0].shape[0]}')

### 5.2 Early Fusion: HOG + SIFT

In [None]:
X_train_hog_sift = []
for idx in range(X_train_grayscale.shape[0]):
    fused = np.concatenate((des_hog_train[idx], des_sift_train[idx]), axis=None)
    X_train_hog_sift.append(fused)

X_test_hog_sift = []
for idx in range(X_test_grayscale.shape[0]):
    fused = np.concatenate((des_hog_test[idx], des_sift_test[idx]), axis=None)
    X_test_hog_sift.append(fused)

In [None]:
print(f'Number of features: {X_train_hog_sift[0].shape[0]}')

### 5.3 Early Fusion: HOG + GCH + SIFT

In [None]:
X_train_hog_gch_sift = []
for idx in range(X_train_grayscale.shape[0]):
    fused = np.concatenate((des_hog_train[idx], color_hist_train[idx]), axis=None)
    fused = np.concatenate((fused, des_sift_train[idx]), axis=None)
    X_train_hog_gch_sift.append(fused)

X_test_hog_gch_sift = []
for idx in range(X_test_grayscale.shape[0]):
    fused = np.concatenate((des_hog_test[idx], color_hist_test[idx]), axis=None)
    fused = np.concatenate((fused, des_sift_test[idx]), axis=None)
    X_test_hog_gch_sift.append(fused)

In [None]:
print(f'Number of features: {X_train_hog_gch_sift[0].shape[0]}')

## 6. Scaling

### 6.1 Minmax Scaling

In [None]:
X_train_scaled = minmax_scale(np.array(X_train_hog_sift))
X_test_scaled = minmax_scale(np.array(X_test_hog_sift))

### 6.2 Standardize

In [None]:
X_train_stand = scale(np.array(X_train_hog_sift))
X_test_stand = scale(np.array(X_test_hog_sift))

## 7. Classification

### 7.1 Logistic Regression

In [None]:
# hyperparameter space
space = {
    'C': [0.01, 0.1, 1, 10],
    'class_weight': [None],
    'max_iter': [5000] 
}

clf = LogisticRegression()

In [None]:
# hyperparameter tunning & cross validation
search = GridSearchCV(clf, space, scoring="accuracy", cv=10)
results = search.fit(X_train_hog_sift, Y_train.ravel())

print(results.best_score_)
print(results.best_params_)

In [None]:
clf = LogisticRegression(C=10, class_weight=None, max_iter=5000)

clf.fit(X_train_hog_sift, Y_train.ravel())

Y_train_pred = clf.predict(X_train_hog_sift)
Y_test_pred = clf.predict(X_test_hog_sift)

print(f'Train accuracy: {accuracy_score(Y_train.ravel(), Y_train_pred)}')
print(f'Test accuracy: {accuracy_score(Y_test.ravel(), Y_test_pred)}')

### 7.2 kNN

In [None]:
# hyperparameter space
space = {
    'n_neighbors': list(range(1, 20, 3)),
    'weights': ['uniform', 'distance']
}

clf = KNeighborsClassifier()

In [None]:
# hyperparameter tunning & cross validation
search = GridSearchCV(clf, space, scoring="accuracy", cv=10)
results = search.fit(X_train_hog_sift, Y_train.ravel())

print(results.best_score_)
print(results.best_params_)

In [None]:
clf = KNeighborsClassifier(n_neighbors=19, weights='distance')

clf.fit(X_train_hog_sift, Y_train.ravel())

Y_train_pred = clf.predict(X_train_hog_sift)
Y_test_pred = clf.predict(X_test_hog_sift)

print(f'Train accuracy: {accuracy_score(Y_train.ravel(), Y_train_pred)}')
print(f'Test accuracy: {accuracy_score(Y_test.ravel(), Y_test_pred)}')

### 7.3 Random Forest

In [None]:
# hyperparameter space
max_depth = (np.linspace(10, 50, num = 5).astype(int)).tolist()
max_depth.append(None)
space = {
    'n_estimators': list(range(10, 101, 10)),
    'max_depth': max_depth ,
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 3, 5],
}

clf = RandomForestClassifier()

In [None]:
# hyperparameter tunning & cross validation
search = GridSearchCV(clf, space, scoring="accuracy", cv=10)
results = search.fit(X_train_hog_sift, Y_train.ravel())

print(results.best_score_)
print(results.best_params_)

In [None]:
clf = RandomForestClassifier(n_estimators=100, max_depth=None, min_samples_split=2, min_samples_leaf=1)

clf.fit(X_train_hog_sift, Y_train.ravel())

Y_train_pred = clf.predict(X_train_hog_sift)
Y_test_pred = clf.predict(X_test_hog_sift)

print(f'Train accuracy: {accuracy_score(Y_train.ravel(), Y_train_pred)}')
print(f'Test accuracy: {accuracy_score(Y_test.ravel(), Y_test_pred)}')