In [1]:
import numpy as np
import cv2
from matplotlib import pyplot as plt
import os
import cyvlfeat as vlfeat
from sklearn.svm import LinearSVC, SVC
import os.path as osp
from skimage import filters
from skimage.feature import corner_peaks
from skimage.io import imread
import pickle
from random import shuffle
from scipy.spatial.distance import cdist
from xgboost import XGBClassifier

In [3]:
def bags_of_sifts(image_paths=[], vocab_filename='', img=None):

    with open(vocab_filename, 'rb') as f:
        vocab = pickle.load(f)

    vocab_size = vocab.shape[0]
#     print(vocab_size)
    feats = []
    
    if len(image_paths) == 0:
        image_paths = ['']
    
    for path in image_paths:
        image = np.asarray(plt.imread(path)) if path != '' else img
        img_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        (frames, descriptors) = vlfeat.sift.dsift(img_gray, fast=True, step=10)
        
        samples = descriptors
        
        D = cdist(samples, vocab)
        
        closest_words = np.argmin(D, axis=1)

#         for i in range(D.shape[0]):
#             min_index = np.argmin(D[i])
#             histogram[min_index]+=1

        histogram, bin_edges = np.histogram(closest_words, bins=np.arange(0, vocab_size+1))
#         print(histogram)

        if np.linalg.norm(histogram) == 0.0:
            print(descriptors.shape)
            print(D.shape)
            print(np.linalg.norm(histogram))
            print(img_gray.shape)
            print("--")
            
            
        histogram = histogram / np.linalg.norm(histogram)
        feats.append(histogram)
        
    N = len(image_paths)
    d = vocab_size
    feats = np.asarray(feats)
    feats = feats.reshape((N,d))
    return feats


def build_vocabulary(image_paths, vocab_size):
    dim = 128     
    vocab = np.zeros((vocab_size,dim))
    sift_features = []

    for path in image_paths:
        image = np.asarray(plt.imread(path))
        img_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        (frames, descriptors) = vlfeat.sift.dsift(img_gray, fast=True, step=5)

        samples = np.random.permutation(descriptors)[:20]

        for descriptor in samples:
            sift_features.append(descriptor)

    sift_features = np.asarray(sift_features).astype('float64').reshape((-1,128))
    vocab = vlfeat.kmeans.kmeans(sift_features, vocab_size)

    return vocab

clf = LinearSVC(C=2)

def svm_classify(train_image_feats, train_labels, test_image_feats):
    categories = list(set(train_labels))
    test_labels = []
    
    clf.fit(train_image_feats, train_labels)
    test_labels = clf.predict(test_image_feats)

    return test_labels


def test_accuracy(test_labels, predicted_labels):
    num_correct = 0
    for i,label in enumerate(test_labels):
        if (predicted_labels[i] == label):
            num_correct += 1
    return num_correct/len(test_labels)

In [4]:
waldo_paths = []
wenda_paths = []
wizard_paths = []
negative_paths = []
all_paths = []

test_image_paths = []
with open('datasets/ImageSets/val.txt') as file:
    for img_id in file.readlines():
        img_id = img_id.rstrip()
        test_image_paths.append('datasets/JPEGImages/{}.jpg'.format(img_id))

file.close()


template_dirs = ["templates/waldo","templates/wenda","templates/wizard"]

for i in range(len(template_dirs)):
    for img_id in os.listdir(template_dirs[i]):
        path_to_dir = os.path.join(template_dirs[i], '{}'.format(img_id)).rstrip()
        if not os.path.isdir(path_to_dir):
            continue
        list_of_files = os.listdir(path_to_dir)
        for file_name in list_of_files:
            all_paths.append(os.path.join(path_to_dir, '{}'.format(file_name)).rstrip())
            if i==0:
                waldo_paths.append(os.path.join(path_to_dir, '{}'.format(file_name)).rstrip())
            if i==1:
                wenda_paths.append(os.path.join(path_to_dir, '{}'.format(file_name)).rstrip())
            if i==2:
                wizard_paths.append(os.path.join(path_to_dir, '{}'.format(file_name)).rstrip())

negative_dir = "negatives_same_scale"

for file_name in os.listdir(negative_dir):
    path = os.path.join(negative_dir, '{}'.format(file_name)).rstrip()
    negative_paths.append(path)
    all_paths.append(path)

# print(sorted(all_paths))
print(len(waldo_paths))
print(len(wenda_paths))
print(len(wizard_paths))
print(len(negative_paths))

137
43
27
184


In [5]:
# get vocab
print('Using the BAG-OF-SIFT representation for images')

vocab_filename = 'vocab.pkl'

# print('No existing visual word vocabulary found. Computing one from training images')
vocab_size = 200  # Larger values will work better (to a point) but be slower to compute
vocab = build_vocabulary(all_paths,vocab_size)
print(np.isnan(vocab).any())
    
with open(vocab_filename, 'wb') as f:
    pickle.dump(vocab, f)

    print('{:s} saved'.format(vocab_filename))

Using the BAG-OF-SIFT representation for images
False
vocab.pkl saved


In [6]:
#get bags of sifts
template_percentage = 0.8

print(f"all_feats = {len(all_paths)}")

# waldo_feats = bags_of_sifts(waldo_paths,vocab_filename)
# wenda_feats = bags_of_sifts(wenda_paths,vocab_filename)
# wizard_feats = bags_of_sifts(wizard_paths,vocab_filename)

# print(np.isnan(waldo_feats).any())
# print(np.isnan(wenda_feats).any())
# print(np.isnan(wizard_feats).any())

# print(waldo_feats)
# print(wenda_feats)
# print(wizard_feats)


waldo_feats = bags_of_sifts(waldo_paths[:int(len(waldo_paths)*template_percentage)],vocab_filename)
wenda_feats = bags_of_sifts(wenda_paths[:int(len(wenda_paths)*template_percentage)],vocab_filename)
wizard_feats = bags_of_sifts(wizard_paths[:int(len(wizard_paths)*template_percentage)],vocab_filename)
negative_feats = bags_of_sifts(negative_paths[:int(len(negative_paths)*template_percentage)], vocab_filename)

training_feats = []
training_feats.extend(waldo_feats)
training_feats.extend(wenda_feats)
training_feats.extend(wizard_feats)
training_feats.extend(negative_feats)

# print(len(waldo_feats))
# print(len(wenda_feats))
# print(len(wizard_feats))
print("--\ntest_feats_lengths")

# test_image_feats 
waldo_test_feats = bags_of_sifts(waldo_paths[int(len(waldo_paths)*template_percentage):len(waldo_paths)],vocab_filename)
wenda_test_feats = bags_of_sifts(wenda_paths[int(len(wenda_paths)*template_percentage):len(wenda_paths)],vocab_filename)
wizard_test_feats = bags_of_sifts(wizard_paths[int(len(wizard_paths)*template_percentage):len(wizard_paths)],vocab_filename)
negative_test_feats = bags_of_sifts(negative_paths[int(len(negative_paths)*template_percentage):len(negative_paths)],vocab_filename)

test_feats = []
test_feats.extend(waldo_test_feats)
test_feats.extend(wenda_test_feats)
test_feats.extend(wizard_test_feats)
test_feats.extend(negative_test_feats)

#set training labels
train_labels = []
train_labels.extend([0]*len(waldo_feats))
train_labels.extend([1]*len(wenda_feats))
train_labels.extend([2]*len(wizard_feats))
train_labels.extend([3]*len(negative_feats))

# print(len(waldo_test_feats))
# print(len(wenda_test_feats))
# print(len(wizard_test_feats))
print("--\ntest_labels lengths")

ground_truth_test_labels = []
ground_truth_test_labels.extend([0]*len(waldo_test_feats))
ground_truth_test_labels.extend([1]*len(wenda_test_feats))
ground_truth_test_labels.extend([2]*len(wizard_test_feats))
ground_truth_test_labels.extend([3]*len(negative_test_feats))

print(len(ground_truth_test_labels))

print(len(train_labels))
print(len(training_feats))
print("--")

print("done")

all_feats = 391
--
test_feats_lengths
--
test_labels lengths
80
311
311
--
done


In [18]:
# predicted_labels = svm_classify(training_feats, train_labels, test_feats)

In [26]:
model = XGBClassifier(learning_rate=0.01, 
                      objective='multi:softprob', 
                      n_estimators=2000, 
                      max_depth=3, 
                      subsample=0.8, 
                      colsample_bytree=1, 
                      num_class=4)
model.fit(np.asarray(training_feats), np.asarray(train_labels))
predicted = model.predict_proba(np.asarray(test_feats))
print(predicted)
# print(test_accuracy(ground_truth_test_labels, predicted))

[[  8.77996743e-01   2.19124160e-03   3.01638991e-03   1.16795659e-01]
 [  8.10314178e-01   1.29717410e-01   8.86325352e-03   5.11052050e-02]
 [  1.14402354e-01   8.76607239e-01   9.96555435e-04   7.99381733e-03]
 [  8.49645078e-01   2.69335206e-03   3.38097615e-03   1.44280568e-01]
 [  5.86937487e-01   2.90454496e-02   6.20405599e-02   3.21976483e-01]
 [  9.38659787e-01   5.53756719e-03   6.70831744e-03   4.90943044e-02]
 [  9.60931838e-01   2.15911982e-03   3.51157319e-03   3.33975069e-02]
 [  8.85311663e-01   6.58615604e-02   6.29082881e-03   4.25359681e-02]
 [  7.13811755e-01   5.26387766e-02   6.13070279e-03   2.27418751e-01]
 [  8.27072561e-01   1.32443324e-01   3.14412685e-03   3.73399928e-02]
 [  9.01323318e-01   3.46054486e-03   6.28790446e-03   8.89282152e-02]
 [  3.22241843e-01   5.99874198e-01   2.61698794e-02   5.17140999e-02]
 [  8.55000377e-01   2.16712127e-03   8.52029712e-04   1.41980484e-01]
 [  9.05604899e-01   3.46444221e-03   9.15406831e-03   8.17765743e-02]
 [  3.

In [17]:
# print(test_accuracy(ground_truth_test_labels, predicted_labels))

# print(predicted_labels)

0.725


In [46]:
def svm_probability(train_image_feats, train_labels, test_image_feats):
    categories = list(set(train_labels))
    test_labels = []
    
    clf = SVC(C=2, gamma='scale',probability=True)
    clf.fit(train_image_feats, train_labels)
    test_probabilities = clf.predict_proba(test_image_feats)

    return test_probabilities

def sliding_window(window_x=200, window_y=600, step_size=1):
    f = open('datasets/ImageSets/val.txt')
    wa = open('my_waldo.txt', 'w+')
    we = open('my_wenda.txt', 'w+')
    wi = open('my_wizard.txt', 'w+')
    
    image_id = f.readline().rstrip()
    image_id = '002'
    while image_id:
        image = np.asarray(plt.imread('datasets/JPEGImages/' + image_id + '.jpg'))
        height, width, c = image.shape
        
        test_feats = []
        
#         print((height-window_size) * (width-window_size))
#         print(f"{height},{width}")
        
        #get the keypoints of the image
        #loop through these keypoints only => saves computation time
        mser = cv2.MSER_create()
#         img_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        reg = mser.detectRegions(image)
        print(len(reg))
        
        ct=0
        for idx in range(len(kp)):
            j,i = kp[idx].pt

            i = int(np.round(i))
            j = int(np.round(j))
#             print(f"{ct}. {i} {j}")
            
            i_limit = i+window_y
            j_limit = j+window_x
            if i_limit >= height:
                i_limit = height-1
            if j_limit >= width:
                j_limit = width-1
            img = image[i:i_limit,j:j_limit]
#             print(f"{i}:{i_limit} {j}:{j_limit}")
            feats = bags_of_sifts(vocab_filename=vocab_filename, img=img)
            test_feats.extend(feats)
            ct += 1

#         print(test_feats)
#         print("--")
        
        #hilda's code: sliding window along entire image
#         test_feats=[]
#         for i in range(0, height-window_size, step_size):
#             for j in range(0, width-window_size, step_size):
#                 img = image[i:i+window_size, j:j+window_size]
#                 feats = bags_of_sifts(vocab_filename=vocab_filename, img=img)
#                 test_feats.extend(feats)
#                 if j==1: 
#                     break
#             break

#         print(len(test_feats))
        predicted_probabilities = model.predict_proba(np.asarray(test_feats))
#         print(predicted_probabilities)
        locations = np.argmax(predicted_probabilities, axis=0)
#         print(locations)
        # hilda's code
        conf = np.max(predicted_probabilities, axis=0)
#         print(conf)

        pl = model.predict(np.asarray(test_feats))
        locations = np.where(pl == 0)[0]
        
        for k in range(len(locations)):
            #hilda's code
#             i = locations[k] // (height-window_size)
#             j = locations[k] % (width-window_size)
            j_new, i_new  = kp[locations[k]].pt  # x location of best-fit window of character k
            
            i_new = int(np.round(i_new))
            j_new = int(np.round(j_new))
#             print(f"{locations[k]}. {i_new} {j_new}")
            i_limit_new = i_new+window_y
            j_limit_new = j_new+window_x
            
            if i_limit_new >= height:
                i_limit_new = height-1
            if j_limit_new >= width:
                j_limit_new = width-1
            
            patch = image[i_new:i_limit_new, j_new:j_limit_new]
            plt.imshow(patch, interpolation='nearest')
            plt.show()
            
#             res = image_id + ' ' + str(np.max(predicted_probabilities[locations[k]][k])) + ' ' + str(j_new) + ' ' + str(i_new) + ' ' + str(j_limit_new) + ' ' + str(i_limit_new) + '\n'
#             print(res)
#             if k == 0:
#                 wa.write(res)
#             if k == 1:
#                 we.write(res)
#             if k == 2:
#                 wi.write(res)
        break
        image_id = f.readline().rstrip()


sliding_window(200, 600)

2


NameError: name 'kp' is not defined