In [1]:
# import packages here
import cv2
import numpy as np
import matplotlib.pyplot as plt
import glob
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.svm import LinearSVC
from tqdm.notebook import tqdm

In [2]:
class_names = [name.split('\\')[1] for name in glob.glob('101_ObjectCategories/*')]
class_names = dict(zip(range(0,len(class_names)), class_names))

numOfClasses = len(class_names)
print("Number of Classes:", numOfClasses)

def loadDataset(path, num_per_class, classes):
    data = []
    labels = []
    
    for id, class_name in classes.items():
        img_path_class = glob.glob(path + class_name + '/*.jpg')
        
        if num_per_class > 0:
            img_path_class = img_path_class[:num_per_class]
        
        labels.extend([id]*len(img_path_class))
        
        for filename in img_path_class:
            data.append(cv2.imread(filename, 0))
            
    return data, labels

# load training dataset
data, label = loadDataset('101_ObjectCategories/*', 80, class_names)

# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(data, label, test_size = 5/8, random_state = 42)
print(f"x shape: {len(X_train)} | y shape: {len(X_test)}")

Number of Classes: 102
x shape: 2291 | y shape: 3820


### Spatial Pyramid Matching

In [3]:
import math

def extract_denseSIFT(img):
    DSIFT_STEP_SIZE = 2
    sift = cv2.xfeatures2d.SIFT_create()
    disft_step_size = DSIFT_STEP_SIZE
    keypoints = [cv2.KeyPoint(x, y, disft_step_size)
            for y in range(0, img.shape[0], disft_step_size)
                for x in range(0, img.shape[1], disft_step_size)]

    descriptors = sift.compute(img, keypoints)[1]
    
    #keypoints, descriptors = sift.detectAndCompute(gray, None)
    return descriptors

def getImageFeaturesSPM(L, img, kmeans, k):
    W = img.shape[1]
    H = img.shape[0]   
    h = []
    for l in (range(L+1)):
        w_step = math.floor(W/(2**l))
        h_step = math.floor(H/(2**l))
        x, y = 0, 0
        for i in range(1,2**l + 1):
            x = 0
            for j in range(1, 2**l + 1):                
                desc = extract_denseSIFT(img[y:y+h_step, x:x+w_step])                
                #print("type:",desc is None, "x:",x,"y:",y, "desc_size:",desc is None)
                predict = kmeans.predict(desc)
                histo = np.bincount(predict, minlength=k).reshape(1,-1).ravel()
                weight = 2**(l-L)
                h.append(weight*histo)
                x = x + w_step
            y = y + h_step
            
    hist = np.array(h).ravel()
    # normalize hist
    dev = np.std(hist)
    hist = hist - np.mean(hist)
    hist = hist - dev
    return hist

# get histogram representation for training/testing data
def getHistogramSPM(L, data, kmeans, k):    
    x = []
    for i in range(len(data)):        
        hist = getImageFeaturesSPM(L, data[i], kmeans, k)        
        x.append(hist)
    return np.array(x)

In [4]:
# compute dense SIFT 
def computeSIFT(data):
    x = []
    for i in range(len(data)):
        sift = cv2.xfeatures2d.SIFT_create()
        img = data[i]
        step_size = 15
        kp = [cv2.KeyPoint(x, y, step_size) for x in range(0, img.shape[0], step_size) for y in range(0, img.shape[1], step_size)]
        dense_feat = sift.compute(img, kp)
        x.append(dense_feat[1])
        
    return x

# extract dense sift features from training images
x_train_SIFT = computeSIFT(X_train)
# x_test_SIFT = computeSIFT(X_test)

In [5]:
all_train_desc = []
for i in tqdm(range(len(x_train_SIFT))):
    for j in range(x_train_SIFT[i].shape[0]):
        all_train_desc.append(x_train_SIFT[i][j,:])

all_train_desc = np.array(all_train_desc)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2291.0), HTML(value='')))




In [6]:
def clusterFeatures(all_train_desc, k):
    kmeans = KMeans(n_clusters=k, random_state=0).fit(all_train_desc)
    return kmeans

In [7]:
levels = range(0, 4)

In [8]:
kmeans16 = clusterFeatures(all_train_desc, 16)

def trainSPMWeak(L):
    train_histo = getHistogramSPM(L, X_train, kmeans16, 16)
    test_histo = getHistogramSPM(L, X_test, kmeans16, 16)
    
    # train SVM
    clf = LinearSVC(random_state=0, C=5E-4)
    clf.fit(train_histo, y_train)
    predict = clf.predict(test_histo)
    print (f"Level: {L} | Accuracy: {np.mean(predict == y_test)*100}%")

for lvl in tqdm(levels):
    trainSPMWeak(lvl)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))



Level: 0 | Accuracy: 5.314136125654451%




Level: 1 | Accuracy: 22.827225130890053%




Level: 2 | Accuracy: 34.79057591623037%
Level: 3 | Accuracy: 39.60732984293194%





In [10]:
kmeans200 = clusterFeatures(all_train_desc, 200)

def trainSPMStrong(L):
    train_histo = getHistogramSPM(L, X_train, kmeans200, 200)
    test_histo = getHistogramSPM(L, X_test, kmeans200, 200)
    
    # train SVM
    clf = LinearSVC(random_state=0, C=5E-4)
    clf.fit(train_histo, y_train)
    predict = clf.predict(test_histo)
    print (f"Level: {L} | Accuracy: {np.mean(predict == y_test)*100}%")

for lvl in tqdm(levels):
    trainSPMStrong(lvl)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))



Level: 0 | Accuracy: 24.81675392670157%




Level: 1 | Accuracy: 38.06282722513089%




Level: 2 | Accuracy: 46.62303664921466%




Level: 3 | Accuracy: 48.874345549738216%

