In [None]:
from keras.applications import vgg16
from keras.applications.vgg16 import preprocess_input
import cv2
import math
import numpy as np
import csv
import nmslib
from os import path

feature_dimenstion = 512 
default_tile_size = 224
default_neighbor_count = 3

def prepare_image_paths_labels():
    labels_file = "household_images/labels.csv"
    data = genfromtxt(labels_file, delimiter=',')
    return data


def prepare_image_paths_labels():
    directory_path = "household_images/images"
    labels_file = "household_images/labels.csv"

    with open(labels_file, "rt", encoding="utf8") as f_input:
        csv_input = csv.reader(f_input)
        header = next(csv_input)
        data = [row for row in csv_input]
    
    image_paths = [path.join(directory_path, row[0]) for row in data]
    image_labels = [int(row[1]) for row in data]
    
    return image_paths, image_labels

def open_and_prepare_image(image_path, window_size = 224, stride=32, steps=(15,15)):
    image = cv2.imread(image_path)
    
    target_height = window_size + (steps[0]-1) * stride
    target_width = window_size + (steps[1]-1) * stride 
    
    target_wh_ratio = target_width/target_height
    
    image_height = image.shape[0]
    image_width = image.shape[1]
    
    image_wh_ratio = image_width/image_height
    
    # if image is taller and skinner than target, scale width first then crop height
    # else if image is shorter and fatter than target, scale height first then crop width
    
    if image_wh_ratio < target_wh_ratio:
        scale_percent = target_width/image.shape[1]
        scale_height = math.floor(scale_percent * image.shape[0])
        image = cv2.resize(image, (target_width, scale_height), interpolation=cv2.INTER_CUBIC)
        m1 = (scale_height - target_height)//2
        m2 = target_height + m1
        return image[m1:m2,:,:]
    
    else:
        scale_percent = target_height/image.shape[0]
        scale_width = math.floor(scale_percent * image.shape[1])
        image = cv2.resize(image, (scale_width, target_height), interpolation=cv2.INTER_CUBIC)
        m1 = (scale_width-target_width)//2
        m2 = target_width + m1
        return image[:,m1:m2,:]

def extract_windows(image_path, window_size = 224, stride=32, steps=(15,15)):
    
    image = open_and_prepare_image(image_path, window_size, stride, steps)
    
    print("image.shape", image.shape)
    
    windows = np.zeros((steps[0] * steps[1], window_size, window_size, image.shape[2]))
    coords = np.zeros((steps[0] * steps[1], 2))
        
    for i in range(steps[0]):
        for j in range(steps[1]):
            # print((stride*i),(stride*i+window_size), (stride*j),(stride*j+window_size))
            img = image[(stride*i):(stride*i+window_size), (stride*j):(stride*j+window_size)]
            windows[i * steps[1] + j] = img
            coords[i * steps[1] + j] = (i, j)

    return windows, coords


def mean_local_feats(local_feats):

    mean_feat = np.zeros((local_feats[0].shape[0],))

    for i in range(len(local_feats)):
        patch_feat = local_feats[i]
        mean_feat = mean_feat + patch_feat

    mean_feat = mean_feat / len(local_feats)

    return mean_feat


def calc_image_descriptors(image_path, model, window_size_blocks = 7, block_size=32, block_grid=(21,28)):
    window_size = window_size_blocks * block_size
    steps = (block_grid[0]-window_size_blocks+1, block_grid[1]-window_size_blocks+1)
    stride = block_size
    
    windows, coords = extract_windows(image_path, window_size, stride, steps)

    print(windows.shape)
    x = preprocess_input(windows)
    y = model.predict(x)
    print(y.shape)

    foo = {}
    
    for i in range(block_grid[0]):
        for j in range(block_grid[1]):
            foo[(i,j)] = []

    for i in range(y.shape[0]):
        coord = coords[i]
        for j in range(y.shape[1]):
            for k in range(y.shape[2]):
                foo[(int(coord[0] + j), int(coord[1] + k))].append(y[i,j,k,:])

    descriptors = []

    window_blocks = window_size_blocks**2
    
    for i in range(block_grid[0]):
        for j in range(block_grid[1]):
            # if len(foo[(i,j)]) == window_blocks:
            descriptors.append(mean_local_feats(foo[(i,j)]))
    
    return descriptors


def main():
    model = vgg16.VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

    image_paths, image_labels = prepare_image_paths_labels()
    
    image_count = len(image_paths)
    neighbor_count = default_neighbor_count
    
    descriptors = []
    labels = []
    indexes = []
    
    window_count = 0
    
    for i in range(image_count):
    
        image_path = image_paths[i]
        print(i, image_path)
    
        image_descriptors = calc_image_descriptors(image_path, model)
        window_count = len(image_descriptors)
        print("window_count", window_count)
        
        for j in range(len(image_descriptors)):
            descriptors.append(image_descriptors[j])
            labels.append(image_labels[i])
            indexes.append(i)
    
    
    scores = []
            
    desc_index = nmslib.init(method='hnsw', space='cosinesimil')
    desc_index.addDataPointBatch(np.array(descriptors), np.arange(len(descriptors)))
    desc_index.createIndex({'post': 2}, print_progress=True)

    for i in range(len(descriptors)):
        descriptor = descriptors[i]

        idxs, distances = desc_index.knnQuery(descriptor, k=neighbor_count + window_count)

        count = 0
        candidates = 0

        for j in range(idxs.shape[0]):
            idx = idxs[j]

            if indexes[i] == indexes[idx]:
                continue

            candidates += 1

            if labels[i] == labels[idx]:
                count += 1

            if candidates >= neighbor_count:
                break

            scores.append(count/candidates)

    return sum(scores)/len(scores)

main()

In [None]:
windows, coords = extract_windows("household_images/images/IMG_0175.jpg", steps=(15, 14))
windows.shape

0.461470852751206
without cropping: 0.46140807316304006
with padding: 0.44942283282143464

In [None]:
from matplotlib import pyplot as plt
windows, coords = extract_windows("household_images/images/IMG_0175.jpg", steps=(1, 2))
print(windows.shape)
img2 = windows[1][:,:,::-1]/255
plt.imshow(img2)

In [None]:
from matplotlib import pyplot as plt
window_size_blocks = 7
block_size=32
block_grid=(21,28)
window_size = window_size_blocks * block_size
steps = (block_grid[0]-window_size_blocks+1, block_grid[1]-window_size_blocks+1)
stride = block_size
img = open_and_prepare_image("household_images/images/IMG_0175.jpg", window_size = window_size, stride=stride, steps=steps)
img2 = img[:,:,::-1]
plt.imshow(img2)