# SVM Classification using Densely Sampled SIFT Features

A very useful local image descriptor is the Scale-Invariant Feature Transform (SIFT). SIFT features are invariant to scale, rotation, and intensity.

## Initial Setup

In [1]:
from __future__ import division
from PIL import Image
import os
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

import gist
import imageutils

%matplotlib inline
%load_ext autoreload
%autoreload 2

## Load Images into a Matrix

In [2]:
base_dir = 'square_images128_dsift'
(X_train, y_train, classes, class_dict) = imageutils.load_data(base_dir + '/train')
(X_test, y_test, _, _) = imageutils.load_data(base_dir + '/test')
image_height = X_train.shape[1]
image_width = X_train.shape[2]
print(X_train.shape)
print(classes)

(1020, 128, 128, 3)
['bluebell', 'buttercup', 'colts_foot', 'cowslip', 'crocus', 'daffodil', 'daisy', 'dandelion', 'fritillary', 'iris', 'lily_valley', 'pansy', 'snowdrop', 'sunflower', 'tigerlily', 'tulip', 'windflower']


## Compute SIFT Features

In [29]:
from os.path import exists, isdir, basename, join, splitext
import sift
from glob import glob
from numpy import zeros, resize, sqrt, histogram, hstack, vstack, savetxt, zeros_like
import scipy.cluster.vq as vq
from cPickle import dump, load, HIGHEST_PROTOCOL
import numpy as np


size = 10
step = 5
num_clusters = 300

dataset_path = '../flower_rec1/square_images128_dsift'
K_THRESH = 1
codebook_file = "codebook_dsift_{0}_{1}_{2}.file".format(size, step, num_clusters)


def get_categories(datasetpath):
    cat_paths = [files for files in glob(datasetpath + "/*") if isdir(files)]
    cat_paths.sort()
    cats = [basename(cat_path) for cat_path in cat_paths]
    return cats

def get_sift_files(path):
    all_files = []
    all_files.extend([join(path, basename(fname)) for fname in glob(path + "/*") if splitext(fname)[-1].lower() == ".dsift_{0}_{1}".format(size,step)])
    return all_files

def computeHistograms(codebook, descriptors):
    code, dist = vq.vq(descriptors, codebook)
    histogram_of_words, bin_edges = histogram(code, bins=range(codebook.shape[0] + 1), normed=True)
    return histogram_of_words


print "## loading codebook from " + codebook_file
with open(codebook_file, 'rb') as f:
    codebook = load(f)


def sift_features(folder):
    folder_path = dataset_path + '/' + folder
    categories = get_categories(folder_path)

    # Find the training SIFT files
    all_sift_files = []

    for category in categories:
        category_path = join(folder_path, category)
        sift_file_list = get_sift_files(category_path)
        all_sift_files += sift_file_list

    all_sift_files = sorted(all_sift_files)


    all_file_descriptors = dict()
    sift_descriptors = []

    for sift_file in all_sift_files:
        desc = sift.read_features_from_file(sift_file)[1]
        sift_descriptors.append(desc)
        all_file_descriptors[sift_file] = desc


    print "## compute the visual words histograms for each image"
    all_word_histograms = dict()

    sift_feature_rows = []

    for sift_file in all_sift_files:
        word_histogram = computeHistograms(codebook, all_file_descriptors[sift_file])
        all_word_histograms[sift_file] = word_histogram
        sift_feature_rows.append(word_histogram)

    sift_feature_matrix = np.vstack(sift_feature_rows)
    print(sift_feature_matrix.shape)
    return sift_feature_matrix


X_train_sift_features = sift_features('train')
X_test_sift_features = sift_features('test')

print("train_sift_features: {0}".format(X_train_sift_features.shape))
print("test_sift_features: {0}".format(X_test_sift_features.shape))

## loading codebook from codebook_dsift_10_5_300.file
## compute the visual words histograms for each image
(1020, 300)
## compute the visual words histograms for each image
(340, 300)
train_sift_features: (1020, 300)
test_sift_features: (340, 300)


## Randomly Shuffle the Rows in the Train Feature Matrix

In [None]:
# Randomly shuffle the input images and labels (IN THE SAME RANDOM ORDER SO THEY ARE STILL CORRELATED)
#rng_state = np.random.get_state()
#np.random.shuffle(X_train_sift_features)
#np.random.set_state(rng_state)
#np.random.shuffle(y_train)

In [30]:
from sklearn.svm import SVC
#from sklearn.cross_validation import cross_val_score

svm = SVC(kernel='linear', class_weight='balanced')
svm.fit(X_train_sift_features, y_train)

y_pred = svm.predict(X_test_sift_features)
print('accuracy: %f' % (np.mean(y_pred == y_test)))

accuracy: 0.288235


In [None]:
from sklearn.cross_validation import KFold
from sklearn.cross_validation import cross_val_score

num_examples = X_train_sift_features.shape[0]
cv = KFold(num_examples, n_folds=10, random_state=None)

svm_cv_accuracies = cross_val_score(svm, X_train_sift_features, y_train, cv=cv, n_jobs=7)
print(svm_cv_accuracies)

## Try combining this with other features

In [31]:
def extract_gist(I):
    return gist.extract(I.astype('uint8'))

In [32]:
def tiny_image(image_array, width=16, height=16):
    I = Image.fromarray(image_array.astype('uint8'))
    I_tiny = I.resize((width, height))
    I_tiny_array = np.array(I_tiny).astype('float')
    return np.reshape(I_tiny_array, width * height * 3)

In [33]:
def extract_features(X, feature_functions):
    num_examples = X.shape[0]
    feature_rows = []
    for index in range(num_examples):
        I = X[index]
        features = []
        for feature_func in feature_functions:
            feature_vec = feature_func(I)
            features.append(feature_vec)
        
        example_features_row_vec = np.hstack(features)
        feature_rows.append(example_features_row_vec)
    
    X_features = np.vstack(feature_rows)
    return X_features

In [34]:
from features import hog_feature, color_histogram_hsv

num_color_bins = 200 # Number of bins in the color histogram
feature_fns = [extract_gist, tiny_image, lambda img: color_histogram_hsv(img, nbin=num_color_bins)]
X_train_features = extract_features(X_train, feature_fns)
X_test_features = extract_features(X_test, feature_fns)

X_train_features = np.hstack([X_train_features, X_train_sift_features])
X_test_features = np.hstack([X_test_features, X_test_sift_features])

# Preprocessing: Subtract the mean feature
mean_features = np.mean(X_train_features, axis=0)
mean_features = np.expand_dims(mean_features, axis=0)
X_train_features -= mean_features
X_test_features -= mean_features

# Preprocessing: Divide by standard deviation. This ensures that each feature
# has roughly the same scale.
std_features = np.std(X_train_features, axis=0)
std_features = np.expand_dims(std_features, axis=0)
X_train_features /= std_features
X_test_features /= std_features

# Preprocessing: Add a bias dimension
X_train_features = np.hstack([X_train_features, np.ones((X_train_features.shape[0], 1))])
X_test_features = np.hstack([X_test_features, np.ones((X_test_features.shape[0], 1))])

print(X_train_features.shape)

(1020, 2229)


In [35]:
from sklearn.svm import SVC
#from sklearn.cross_validation import cross_val_score

svm = SVC(kernel='linear', class_weight='balanced')
svm.fit(X_train_features, y_train)

y_pred = svm.predict(X_test_features)
print('accuracy: %f' % (np.mean(y_pred == y_test)))

accuracy: 0.729412


In [36]:
from sklearn.cross_validation import KFold
from sklearn.cross_validation import cross_val_score

num_examples = X_train_features.shape[0]
cv = KFold(num_examples, n_folds=10, shuffle=True, random_state=None)

svm_cv_accuracies = cross_val_score(svm, X_train_features, y_train, cv=cv, n_jobs=7)
print(svm_cv_accuracies)

[ 0.67647059  0.65686275  0.68627451  0.66666667  0.66666667  0.61764706
  0.59803922  0.66666667  0.81372549  0.61764706]


In [37]:
from sklearn.svm import SVC
#from sklearn.cross_validation import cross_val_score

svm = SVC(kernel='linear', class_weight='balanced', probability=True)
svm.fit(X_train_features, y_train)

y_pred = svm.predict_proba(X_test_features)
print(y_pred[0])

[ 0.71965952  0.00167979  0.00604713  0.00854004  0.00562125  0.00186928
  0.01598482  0.00182104  0.04596984  0.05848008  0.03349536  0.03800551
  0.02531016  0.00236164  0.00414568  0.01001119  0.02099767]


In [38]:
y_augmented = [[(prob, index) for (index,prob) in enumerate(y_pred[index])] for index in range(len(y_pred))]
y_sorted = [sorted(y_augmented[index], reverse=True) for index in range(len(y_augmented))]
y_top5 = [y_sorted[index][:5] for index in range(len(y_sorted))]
y_top5_labels = [[label for (prob, label) in y_top5[index]] for index in range(len(y_sorted))]
print(y_top5_labels[0])

num_within_top_5 = 0.0
for index in range(len(y_test)):
    if y_test[index] in y_top5_labels[index]:
        num_within_top_5 += 1.0

percentage_within_top_5 = num_within_top_5 / len(y_test)
print("Top-5 accuracy: {0}".format(percentage_within_top_5))

y_top1_labels = [labels[0] for labels in y_top5_labels]
print("Top-1 accuracy: {0}".format((np.mean(y_top1_labels == y_test))))

[0, 9, 8, 11, 10]
Top-5 accuracy: 0.947058823529
Top-1 accuracy: 0.747058823529


In [None]:
from sklearn.grid_search import GridSearchCV

param_grid = {
    'C': [1e3, 5e3, 1e4, 5e4, 1e5],
    'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
}

#clf = GridSearchCV(svm, param_grid, scoring='f1', cv=cv, n_jobs=7)
clf = GridSearchCV(svm, param_grid, cv=cv, n_jobs=7)

clf = clf.fit(X_train_features, y_train)

print("Best estimator found by randomized hyper parameter search:")
print(clf.best_params_)
print("Best parameters validation score: {:.3f}".format(clf.best_score_))