In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import csv
import cv2
import numpy as np
from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from scipy.spatial import distance


In [0]:
def read_sift_descriptors(path, name, number_of_images):
	sift_features_of_all_images = []
	number_of_features_in_each_image = []
	for i in range(1, number_of_images + 1):
		train_file = path + '/' + str(i) + '_' + name + '_sift.csv'
		csvfile = open(train_file, 'rt')
		csvfile = csv.reader(csvfile)
		number_of_features = 0
		for x in csvfile:
			sift_features_of_all_images.append(list(map(int, x[4:])))
			number_of_features+=1
		number_of_features_in_each_image.append(number_of_features)
	return sift_features_of_all_images, number_of_features_in_each_image

In [0]:
def compute_visual_words_k_means(train_sift_features, test_sift_features, no_k_clusters):
	k_means = KMeans(n_clusters = no_k_clusters)
	k_means.fit(train_sift_features)
	centroids = k_means.cluster_centers_
	labels = k_means.labels_
	return centroids, labels

In [0]:
def find_similarity(image_sift_feature, cluster_centroids):
	index = 0
	distances = []
	for feature in cluster_centroids:
		d = distance.euclidean(feature, image_sift_feature)
		distances.append(d)
	index = distances.index(min(distances))
	return index

In [0]:
def visual_words_representation_of_images(All_sift_features, number_of_features_in_each_image, cluster_centroids, no_k_clusters):
	images = []
	count = 0
	i = 0
	image_feature = [0]*no_k_clusters
	for image_sift_feature in All_sift_features:
		index = find_similarity(image_sift_feature, cluster_centroids)
		image_feature[index] += 1
		count+=1
		if count == number_of_features_in_each_image[i]:
			images.append(image_feature)
			image_feature = [0]*no_k_clusters
			count = 0
			i+=1
	return images

In [0]:
def read_labels(path):
	with open(path,'rU') as csvfile:
		csvfile = csv.reader(csvfile, delimiter=',')
		csvdata = list(csvfile)
		return list(map(int, csvdata[0]))


In [0]:
def kNN_classifer(train_images, train_labels, k_NN):
	knn = KNeighborsClassifier(n_neighbors = k_NN)
	knn.fit(train_images, train_labels)
	return knn


In [0]:
def display_confusion_matrix(test_labels, test_prediction):
    print(confusion_matrix(test_labels, test_prediction))


In [0]:
def display_categorization_accuracy(test_labels, test_prediction):
	categorization_accuracy = accuracy_score(test_labels, test_prediction)
	print(categorization_accuracy)


In [12]:
# === Main function === #

# sift features of training set
train_sift_features, number_of_features_in_each_train_image = read_sift_descriptors('drive/My Drive/HW3_data/train_sift_features', 'train', 1888)
print(len(number_of_features_in_each_train_image))

# sift features of test set
test_sift_features, number_of_features_in_each_test_image = read_sift_descriptors('drive/My Drive/HW3_data/test_sift_features', 'test', 800)
print(len(number_of_features_in_each_test_image))

# Use K-means to compute visual words # Cluster descriptors
no_k_clusters = 16
print(no_k_clusters)
#centres and labels of clusters
cluster_centroids, labels = compute_visual_words_k_means(train_sift_features, test_sift_features, no_k_clusters)
print(cluster_centroids)

# Training
# Represent each image by normalized counts of visual words
train_images = visual_words_representation_of_images(train_sift_features, number_of_features_in_each_train_image, cluster_centroids, no_k_clusters)
train_labels_path = 'drive/My Drive/HW3_data/train_labels.csv'
train_labels = read_labels(train_labels_path)

# Train the images using kNN classifer
k_NN = 5
kNN_model = kNN_classifer(train_images, train_labels, k_NN)

# Testing
test_images = visual_words_representation_of_images(test_sift_features, number_of_features_in_each_test_image, cluster_centroids, no_k_clusters)
test_prediction = kNN_model.predict(test_images)
test_labels_path = 'drive/My Drive/HW3_data/test_labels.csv'
test_labels = read_labels(test_labels_path)

# Confusion matrix
display_confusion_matrix(test_labels, test_prediction)

# Categorization Accuracy
display_categorization_accuracy(test_labels, test_prediction)


1888
800
16
[[85.71761853 18.34502664  1.61009018 ...  3.43346086  3.30615521
  21.71406613]
 [27.30801432 11.15926376  5.78136384 ... 12.03428772 10.44749792
  17.0606506 ]
 [29.73143976 17.41290537  9.79711162 ... 24.0417744  28.66510305
  27.16495531]
 ...
 [29.34153989 13.77764874  7.53989084 ... 40.94479929  8.52370998
   8.95823172]
 [23.20251151 14.5253244  12.88078694 ... 16.34830473 14.33357053
  12.20778568]
 [19.20100975 13.23168524  7.79052925 ...  6.39171309  9.55539694
  42.99899025]]


  


[[49  0 21  2 11 11  1  5]
 [ 0 84  0  0  3  0  8  5]
 [31  0 33  4  5 13 11  3]
 [ 6 11  6 37  3 10 17 10]
 [14  3  3  7 35 22 11  5]
 [23  5  7  2 12 32 10  9]
 [ 5  3  2 14 13  9 47  7]
 [13  4  3 14  9 14  6 37]]
0.4425


  
