In [5]:
# Essential libraries
import cv2
import numpy as np
import matplotlib.pyplot as plt
import glob
from tqdm.notebook import tqdm
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from scipy.misc import imresize  # resize images
import copy
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
import math

In [6]:
# Essential functions
def loadDataset(path, num_per_class, classes):
    data = []
    labels = []
    
    for id, class_name in classes.items():
        img_path_class = glob.glob(path + class_name + '/*.jpg')
        
        if num_per_class > 0:
            img_path_class = img_path_class[:num_per_class]
        
        labels.extend([id]*len(img_path_class))
        
        for filename in img_path_class:
            data.append(cv2.imread(filename, 0))
            
    return data, labels

def computeSIFT(data):
    x = []
    
    for i in tqdm(range(0, len(data))):
        sift = cv2.xfeatures2d.SIFT_create()
        img = data[i]
        step_size = 15
        kp = [cv2.KeyPoint(x, y, step_size) for x in range(0, img.shape[0], step_size)
              for y in range(0, img.shape[1], step_size)]
        dense_feat = sift.compute(img, kp)
        x.append(dense_feat[1])
        
    return x

def extract_denseSIFT(img):
    DSIFT_STEP_SIZE = 2
    sift = cv2.xfeatures2d.SIFT_create()
    disft_step_size = DSIFT_STEP_SIZE
    keypoints = [cv2.KeyPoint(x, y, disft_step_size)
            for y in range(0, img.shape[0], disft_step_size)
                for x in range(0, img.shape[1], disft_step_size)]

    descriptors = sift.compute(img, keypoints)[1]
    
    #keypoints, descriptors = sift.detectAndCompute(gray, None)
    return descriptors

def SPM(L, img, kmeans, k):
    W = img.shape[1]
    H = img.shape[0]   
    h = []
    for l in (range(L+1)):
        w_step = math.floor(W/(2**l))
        h_step = math.floor(H/(2**l))
        x, y = 0, 0
        for i in range(1,2**l + 1):
            x = 0
            for j in range(1, 2**l + 1):                
                desc = extract_denseSIFT(img[y:y+h_step, x:x+w_step])                
                #print("type:",desc is None, "x:",x,"y:",y, "desc_size:",desc is None)
                predict = kmeans.predict(desc)
                histo = np.bincount(predict, minlength=k).reshape(1,-1).ravel()
                weight = 2**(l-L)
                h.append(weight*histo)
                x = x + w_step
            y = y + h_step
            
    hist = np.array(h).ravel()
    # normalize hist
    dev = np.std(hist)
    hist -= np.mean(hist)
    hist /= dev
    return hist

# get histogram representation for training/testing data
def getHistogramSPM(L, data, kmeans, k):    
    x = []
    for i in range(len(data)):        
        hist = SPM(L, data[i], kmeans, k)        
        x.append(hist)
    return np.array(x)

## Load Images

In [7]:
class_names = [name.split('\\')[1] for name in glob.glob('101_ObjectCategories/*')]
class_names = dict(zip(range(0,len(class_names)), class_names))

# for key, value in class_names.items():
#     print(key, ':', value)
numOfClasses = len(class_names)
print("Number of Classes:", numOfClasses)

# load training dataset
data, label = loadDataset('101_ObjectCategories/*', 80, class_names)

# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(data, label, test_size = 5/8, random_state = 42)
print(f"x shape: {len(X_train)} | y shape: {len(X_test)}")

Number of Classes: 102


### Spatial Pyramid Matching

In [None]:
# extract dense sift features from training images
x_train_Computed = computeSIFT(X_train)
x_test_Computed = computeSIFT(X_test)

all_train_desc = []
for i in tqdm(range(len(x_train_Computed))):
    for j in range(x_train_Computed[i].shape[0]):
        all_train_desc.append(x_train_Computed[i][j,:])

all_train_descriptors = np.array(all_train_desc)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2291.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3820.0), HTML(value='')))

In [None]:
k = 16
kmeans = KMeans(n_clusters=k, random_state=0).fit(all_train_desc)

In [6]:
train_histo = getHistogramSPM(0, X_train, kmeans, k)
test_histo = getHistogramSPM(0, X_test, kmeans, k)
clf = LinearSVC(random_state=0, C=0.0005)
clf.fit(train_histo, y_train)
predict = clf.predict(test_histo)

In [7]:
print ("Accuracy:", np.mean(predict == y_test)*100, "%")

Accuracy: 33.76237623762376 %
