In [1]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
import random
import pylab as pl
from scipy import ndimage
from sklearn.cluster import KMeans
from scipy.spatial import distance

In [4]:
#Bag of visual words model

def load_images_from_folder(folder):
    images = {}
    for filename in os.listdir(folder):
        category = []
        path = folder + "/" + filename
        for cat in os.listdir(path):
            img = cv2.imread(path + "/" + cat,0)
            #img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            if img is not None:
                category.append(img)
        images[filename] = category
    return images
# take all images category by category 
images = load_images_from_folder('./dataset')  
# take test images 
test = load_images_from_folder("./query")

In [5]:
# extract sift features 
def sift_features(images):
    sift_vectors = {}
    descriptor_list = []
    sift = cv2.SIFT_create()
    for key,value in images.items():
        features = []
        for img in value:
            kp, des = sift.detectAndCompute(img,None)
           
            
            descriptor_list.extend(des)
            features.append(des)
        sift_vectors[key] = features
    return [descriptor_list, sift_vectors]

sifts = sift_features(images) 
# Takes the descriptor list which is unordered one
descriptor_list = sifts[0] 
# Takes the sift features that is seperated class by class for train data
all_bovw_feature = sifts[1] 
# Takes the sift features that is seperated class by class for test data
test_bovw_feature = sift_features(test)[1]

In [6]:

# A k-means clustering algorithm who takes 2 parameter which is number 
# of cluster(k) and the other is descriptors list(unordered 1d array)
# Returns an array that holds central points.
def kmeans(k, descriptor_list):
    kmeans = KMeans(n_clusters = k, n_init=10)
    kmeans.fit(descriptor_list)
    visual_words = kmeans.cluster_centers_ 
    return visual_words
    
# Takes the central points which is visual words    
visual_words = kmeans(150, descriptor_list) 

In [7]:
# find the nearest visual word for each sift feature and return index of visual word
def find_index(image, center):
    count = 0
    ind = 0
    for i in range(len(center)):
        if(i == 0):
           count = distance.euclidean(image, center[i]) 
           #count = L1_dist(image, center[i])
        else:
            dist = distance.euclidean(image, center[i]) 
            #dist = L1_dist(image, center[i])
            if(dist < count):
                ind = i
                count = dist
    return ind

In [8]:
# Takes 2 parameters. The first one is a dictionary that holds the descriptors that are separated class by class 
# And the second parameter is an array that holds the central points (visual words) of the k means clustering
# Returns a dictionary that holds the histograms for each images that are separated class by class. 
def image_class(all_bovw, centers):
    dict_feature = {}
    for key,value in all_bovw.items():
        category = []
        for img in value:
            histogram = np.zeros(len(centers))
            for each_feature in img:
                ind = find_index(each_feature, centers)
                histogram[ind] += 1
            category.append(histogram)
        dict_feature[key] = category
    return dict_feature
    
# Creates histograms for train data    
bovw_train = image_class(all_bovw_feature, visual_words) 
# Creates histograms for test data
bovw_test = image_class(test_bovw_feature, visual_words) 

In [9]:
# 1-NN algorithm. We use this for predict the class of test images.
# Takes 2 parameters. images is the feature vectors of train images and tests is the feature vectors of test images
# Returns an array that holds number of test images, number of correctly predicted images and records of class based images respectively
def knn(images, tests):
    num_test = 0
    correct_predict = 0
    class_based = {}
    
    for test_key, test_val in tests.items():
        # list that holds number of correctly predicted images and number of all images in a class
        class_based[test_key] = [0, 0] 
        for tst in test_val:
            predict_start = 0
            
            minimum = 0
            key = "a" 
            for train_key, train_val in images.items():
                for train in train_val:
                    if(predict_start == 0):
                        minimum = distance.euclidean(tst, train)
                        key = train_key
                        predict_start += 1
                    else:
                        dist = distance.euclidean(tst, train)
                        if(dist < minimum):
                            minimum = dist
                            key = train_key
            # if the predicted class is equal to the real class, increase the number of correctly predicted images
            if(test_key == key):
                correct_predict += 1
                class_based[test_key][0] += 1
            num_test += 1
            class_based[test_key][1] += 1
    return [num_test, correct_predict, class_based]
    
# Call the knn function    
results_bowl = knn(bovw_train, bovw_test) 

In [10]:
# Calculates the average accuracy and class based accuracies.  
def accuracy(results):
    avg_accuracy = (results[1] / results[0]) * 100
    print("Average accuracy: " + str(avg_accuracy)+"%")
    print("\nClass based accuracies: \n")
    for key,value in results[2].items():
        acc = (value[0] / value[1]) * 100
        print(key + " : " + str(acc)+"%")
        
# Calculates the accuracies and write the results to the console.       
accuracy(results_bowl) 

Average accuracy: 63.366336633663366%

Class based accuracies: 

Cat : 67.32673267326733%
Dog : 59.4059405940594%


In [11]:
#testing accuracy with a pretrained model
# we will use transfer learning on a pretrianed VGG16 model for cats and dogs classification
# import VGG 16 from keras.applications
from tensorflow.keras.applications import VGG16
# instantiate the model using the imagenet weights and input shape of 224x224x3
vgg16 = VGG16(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
# Make the VGG layers non-trainable
for layer in vgg16.layers:
    layer.trainable = False

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# input_ is VGG16 input layer
input_ = vgg16.input
# output_ is VGG16 output layer
output_ = vgg16(input_)
# Flatten the output
last_layer = Flatten(name='flatten')(output_)
# Add a dense layer with 1 neuron and sigmoid activation
last_layer = Dense(1, activation='sigmoid')(last_layer)
# Create the model with input_ as input and last_layer as output
model = Model(inputs = input_, outputs = last_layer)
# Define the training parameters
BATCH_SIZE = 32
STEPS_PER_EPOCH = 200 // BATCH_SIZE
EPOCHS = 3
# Compile the model
model.compile(optimizer ='adam',loss = 'binary_crossentropy',metrics=['accuracy'])
# Create the training and validation generators
training_data_generator = ImageDataGenerator(rescale = 1./255)
testing_data_generator = ImageDataGenerator(rescale = 1./255)
# Create the training and validation data
training_set = training_data_generator.flow_from_directory('./dataset', target_size=(224,224),
                                                           batch_size = BATCH_SIZE, class_mode = 'binary')
test_set = testing_data_generator.flow_from_directory('./query',
                                             target_size = (224, 224),
                                             batch_size = BATCH_SIZE,
                                             class_mode = 'binary')
# Train the model
model.fit_generator(training_set, steps_per_epoch = STEPS_PER_EPOCH, epochs = EPOCHS, verbose =1)

Found 402 images belonging to 2 classes.
Found 202 images belonging to 2 classes.


  model.fit_generator(training_set, steps_per_epoch = STEPS_PER_EPOCH, epochs = EPOCHS, verbose =1)


Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x14706ba00a0>

In [12]:
# Evaluate the model
score = model.evaluate_generator(test_set,len(test_set))
# Print the test accuracy
for idx, metric in enumerate(model.metrics_names):
    print("{}: {}".format(metric,score[idx]))

  score = model.evaluate_generator(test_set,len(test_set))


loss: 0.5274196863174438
accuracy: 0.698019802570343
